# Load packages
library(sf)
library(xml2)
library(tidyverse)
library(tidybayes)
library(brms)
library(vegan)
library(kableExtra)
library(treemapify)
library(ggrepel)
# Define genus level taxon groups (plus one family FAVI)
taxon_groups <- list(
PORI = c("PPOR", "PFUR", "PDIV", "PAST", "PORI"),
ORBI = c("OFAV", "OANN", "OFRA", "ORBI"),
#FAVI = c("CNAT", "DLAB", "PSTR", "PCLI", "MARE", "FAVI"),
AGAR = c("AFRA", "AAGA", "AHUM", "ALAM", "AGAR"),
MADR = c("MAUR", "MSEN", "MDEC", "MPHA", "MADR"),
SOLE = c("SHYA", "SBOU", "SOLE"),
SCOL = c("SLAC", "SCUB", "SCOL"),
SIDE = c("SSID", "SRAD", "SIDE"),
MYCE = c("MFER", "MLAM", "MALI", "MYCE"),
OCUL = c("OROB", "ODIF", "OCUL")
)
# Convert to lookup tibble
taxon_lookup <- enframe(taxon_groups, name = "taxon_group", value = "taxon") %>%
unnest(taxon)
# Define juvenile family level taxon groups (following DRM survey convention)
taxon_groups_juv <- list(
MUSS = c("ISIN", "ISOP", "MANG", "MYCE", "SCOL", "MUSS", "MALI", "MFER"),
FAVI = c("FAVI", "FFRA", "MARE", "DLAB", "PSTR", "PCLI", "CNAT"),
MEAN = c("MMEA", "MEAN", "DCYL", "DSTO", "EFAS")
)
# Convert to lookup tibble
taxon_lookup_juv <- enframe(taxon_groups_juv, name = "taxon_group", value = "taxon") %>%
unnest(taxon)
# Order levels of Habitat Types
type_levels <- c("Nearshore Ridge Complex", "Inner Reef", "Middle Reef",
"Outer Reef")
type_labels <- c("NRC", "IR", "MR", "OR", "APR")
names(type_labels) <- type_levels
# Order survey datasets
dataset_levels <- c("nsu11_esa", "dca17_esa", "dca17", "tt21", "tt23", "drm24")
dataset_labels <- c("2011-NSU-ESA", "2017-DCA-ESA", "2017—DCA", "2021—TT", "2023—TT", "2024—Shedd")
names(dataset_labels) <- dataset_levels
# Import data
nsu11_esa0 <- readxl::read_xlsx("data/2011_nsu_esa/Port Everglades_NSU 2011and DCA 2017_ESA surveys.xlsx",
sheet = "2011_NSU_ESA survey") %>%
janitor::clean_names()
# Site metadata
nsu11_esa_sitemd <- nsu11_esa0 %>%
select(site = ident, latitude = lat, longitude = long) %>%
mutate(site = as.character(site))
# ESA coral count data
## DO NOT KEEP M.FEROX DATA -- ALL OTHER SURVEYS ALL MYCETOPHYLLIA ARE ALICIAE, so cannot combine at 'MYCE' (genus) level
nsu11_esa_counts <- nsu11_esa0 %>%
select(site = ident,
ACER = a_cervic_1, OANN = m_annula_1, OFAV = m_faveol_1, OFRA = m_franks_1, DSTO = d_stokes_1) %>%
pivot_longer(-site, names_to = "taxon", values_to = "n") %>%
# Assume all were >4cm since no sizes are reported
mutate(class = ">4cm") %>%
mutate(site = as.character(site))
# Aggregate taxa (multiple orbicella observed --> ORBI)
nsu11_esa_counts_ag <- nsu11_esa_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
library(sf)
# Read KML file
dca17_esa_sitemd0 <- st_read("data/2017_dca_esa/Listed_Coral_Survey_2017.kml")
## Reading layer `Listed_Coral_Survey_2017' from data source
## `/Users/rcunning/Projects/PENIP/data/2017_dca_esa/Listed_Coral_Survey_2017.kml'
## using driver `KML'
## Simple feature collection with 149 features and 2 fields
## Geometry type: POINT
## Dimension: XYZ
## Bounding box: xmin: -80.10416 ymin: 26.08429 xmax: -80.08178 ymax: 26.10336
## z_range: zmin: 0 zmax: 0
## Geodetic CRS: WGS 84
# Extract coordinates and save as CSV
dca17_esa_sitemd <- dca17_esa_sitemd0 %>%
mutate(site = Name,
longitude = st_coordinates(.)[, 1],
latitude = st_coordinates(.)[, 2]) %>%
st_drop_geometry() %>%
select(site, longitude, latitude) %>%
as_tibble() %>%
mutate(site = as.character(site))
# Get coral data
# Appendix D -- contains individual colony sizes
# what is the "NUMBER" column? ignore for now.
dca17_esa0 <- readxl::read_xlsx("data/2017_dca_esa/Appendix D_ESA_listed_coral_Plotted_Locations.xlsx") %>%
janitor::clean_names() %>%
select(site = site_id, taxon = species, length_cm, width_cm, height_cm, m2_of_habi, density) %>%
mutate(across(ends_with("cm"), as.numeric))
# Assign size classes
dca17_esa <- dca17_esa0 %>%
mutate(max_dim_cm = pmax(length_cm, width_cm, height_cm, na.rm = TRUE),
class = if_else(max_dim_cm >= 4, ">4cm", "<4cm"))
# Count taxon and size class per site
dca17_esa_counts <- dca17_esa %>%
count(site, taxon, class) %>%
mutate(site = as.character(site))
# Total area surveyed per site = 784 m2 (crossed 100x4m belt transects with 16m2 overlap)
# Agreggate taxa (OFAV -> ORBI)
dca17_esa_counts_ag <- dca17_esa_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
# Merge with site metadata to get zero counts
# Step 1: Define all taxon-class combos you want to include
# This can be pulled from your full dataset (like dfftaxon), or hardcoded:
all_taxon_class <- expand_grid(
taxon = c("ACER", "ORBI"), # example taxa
class = c("<4cm", ">4cm")
)
# Step 2: Get all site IDs from the site metadata
all_sites <- dca17_esa_sitemd %>%
distinct(site)
# Step 3: Create full site × taxon × class combinations
full_grid <- expand_grid(
site = all_sites$site,
all_taxon_class
)
# Step 4: Left join with observed counts and fill missing with 0
dca17_esa_counts_ag_full <- full_grid %>%
left_join(dca17_esa_counts_ag, by = c("site", "taxon", "class")) %>%
mutate(n = replace_na(n, 0))
DCA site metadata
# Site metadata
# Read in site coordinates
dca17_sitemd0 <- readxl::read_xlsx("data/2017_dca_recon/Recon_Site_Coordinates_Extracted.xlsx") %>%
janitor::clean_names()
# All sites have start and end coordinates...
# Tidy and Calculate midpoint per transect
dca17_sitemd <- dca17_sitemd0 %>%
mutate(
depth = abs(as.numeric(depth)),
across(c(latitude, longitude), as.numeric)
) %>%
group_by(site = transect) %>%
summarize(
latitude = mean(latitude, na.rm = TRUE),
longitude = mean(longitude, na.rm = TRUE),
depth = mean(depth, na.rm = TRUE),
.groups = "drop"
)
DCA coral data
# Read in survey data
dca170 <- readxl::read_xlsx("data/2017_dca_recon/Compiled_DCA_RECON_Belt_data.xlsx") %>%
janitor::clean_names()
dca17 <- dca170 %>%
select(1:18) %>%
rename(site = site_name) %>%
mutate(site = factor(site)) %>%
select(site, taxon = coral_species, max_width_cm = max_size_cm)
# Adjust/corrects species IDs
sort(unique(dca17$taxon))
## [1] "AAGA" "ACER" "AFRA"
## [4] "AGA SP" "AHUM" "ALAM"
## [7] "CNAT" "CORAL" "Cup Coral"
## [10] "DLAB" "DSTO" "EFAS"
## [13] "LCUC" "MAD SP" "MADSP"
## [16] "MALI" "MARE" "MCAV"
## [19] "MDEC" "MLAM" "MMEA"
## [22] "MPHA" "Mycetophyllia spp." "MYCSP"
## [25] "OANN" "ODIF" "OFAV"
## [28] "OFAV\\" "PAST" "PCLI"
## [31] "PFUR" "PPOR" "PSTR"
## [34] "SBOU" "Scolymia Spp" "SCUB"
## [37] "Sid SP" "SID SP" "SID SP."
## [40] "SIDSP" "SINT" "SRAD"
## [43] "SSID"
dca17 <- dca17 %>%
mutate(taxon = case_when(
taxon == "AGA SP" ~ "AGAR",
taxon == "LCUC" ~ "HCUC",
taxon %in% c("MYCSP", "Mycetophyllia spp.") ~ "MYCE",
taxon == "OFAV\\" ~ "OFAV",
taxon %in% c("MAD SP", "MADSP") ~ "MADR",
taxon == "Scolymia Spp" ~ "SCOL",
taxon %in% c("SIDSP", "Sid SP", "SID SP.", "SID SP") ~ "SIDE",
TRUE ~ taxon
))
sort(unique(dca17$taxon))
## [1] "AAGA" "ACER" "AFRA" "AGAR" "AHUM" "ALAM"
## [7] "CNAT" "CORAL" "Cup Coral" "DLAB" "DSTO" "EFAS"
## [13] "HCUC" "MADR" "MALI" "MARE" "MCAV" "MDEC"
## [19] "MLAM" "MMEA" "MPHA" "MYCE" "OANN" "ODIF"
## [25] "OFAV" "PAST" "PCLI" "PFUR" "PPOR" "PSTR"
## [31] "SBOU" "SCOL" "SCUB" "SIDE" "SINT" "SRAD"
## [37] "SSID"
# Filter out unidentified corals
dca17 <- dca17 %>%
filter(!taxon %in% c("CORAL", "Cup Coral"))
# Write long data to file
write_csv(dca17, file = "data/processed/dca_2017_long.csv")
# Convert to count data
# Add explicit zeros for any taxon/size class missing at any site
dca17_counts <- dca17 %>%
mutate(class = ifelse(max_width_cm >= 4, ">4cm", "<4cm")) %>%
count(site, taxon, class) %>%
complete(site, taxon, class = c(">4cm", "<4cm"), fill = list(n = 0)) %>%
# # Don't create zeros for MEAN/MUSS/FAVI adults, since these IDs only applied to juv
filter(!(taxon %in% c("MEAN", "MUSS", "FAVI") & class == ">4cm" & n == 0))
write_csv(dca17_counts, file = "data/processed/dca_2017_counts.csv")
# Aggregate count data based on taxonomic groups defined above
dca17_counts_ag <- dca17_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
# Further aggregate juvenile counts to family (following DRM methods)
dca17_counts_ag <- dca17_counts_ag %>%
left_join(taxon_lookup_juv, by = "taxon") %>%
mutate(
taxon = if_else(class == "<4cm" & !is.na(taxon_group), taxon_group, taxon)
) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
dca17_counts_ag %>%
distinct(taxon, class) %>%
arrange(taxon, class)
## # A tibble: 35 × 2
## taxon class
## <chr> <chr>
## 1 ACER <4cm
## 2 ACER >4cm
## 3 AGAR <4cm
## 4 AGAR >4cm
## 5 CNAT >4cm
## 6 DLAB >4cm
## 7 DSTO >4cm
## 8 EFAS >4cm
## 9 FAVI <4cm
## 10 HCUC <4cm
## # ℹ 25 more rows
write_csv(dca17_counts_ag, file = "data/processed/dca_2017_counts_ag.csv")
2021 TT site metadata
# site metadata
tt21_sitemd0 <- read_csv("data/2021_tt_recon_esa/midpoints_latlon.csv") %>%
janitor::clean_names()
tt21_sitemd <- tt21_sitemd0 %>%
mutate(name = str_remove(name, "A$")) %>%
select(site = name, longitude = lon, latitude = lat)
# There was a lot of sand in these transects that was quantified in final RECON report (though these were the same transects for the ESA and RECON datasets). Extracted this data from RECON report, Table 2:
tt21_sand <- read_csv("data/2021_tt_recon_esa/Table_2_Port_Everglades_RECON.csv") %>%
janitor::clean_names()
tt21_m_nosand <- tt21_sand %>%
mutate(area_m2 = 30 - meters_of_sc_sp) %>%
mutate(site = as.character(site))
2021 TT coral data
# coral data - recon belt transects
tt21recon0 <- readxl::read_xlsx("data/2021_tt_recon_esa/Recon 30x1m Coral Belt Transect.xlsx") %>%
janitor::clean_names()
tt21recon <- tt21recon0 %>%
select(site, taxon = id_abbrev, coral_length_cm, coral_width_cm) %>%
mutate(taxon = toupper(taxon), site = factor(site)) %>%
mutate(across(c(coral_length_cm, coral_width_cm), as.numeric)) %>%
mutate(max_width_cm = pmax(coral_length_cm, coral_width_cm)) %>%
select(site, taxon, max_width_cm)
# ESA survey data
tt21esa0 <- readxl::read_xlsx("data/2021_tt_recon_esa/ESA Coral Belt Transect.xlsx") %>%
janitor::clean_names()
sort(unique(tt21esa0$esa_id))
## [1] "0.0" "Acropora cervicornis" "ML QC Not ESA"
## [4] "Orbicella faveolata" "Orbicella franksi" "Outside Belt"
tt21esa <- tt21esa0 %>%
mutate(site = factor(site),
taxon = case_when(
esa_id == "Orbicella franksi" ~ "OFRA",
esa_id == "Orbicella faveolata" ~ "OFAV",
esa_id == "Acropora cervicornis" ~ "ACER")) %>%
filter(!is.na(taxon)) %>%
mutate(max_width_cm = pmax(coral_length_cm, coral_width_cm)) %>%
select(site, taxon, max_width_cm)
# Combine Recon and ESA survey data
tt21 <- bind_rows(tt21recon, tt21esa)
# Check taxa names
sort(unique(tt21recon$taxon))
## [1] "0" "AAGA"
## [3] "AFRAG" "ALAM"
## [5] "CNAT" "DLAB"
## [7] "DSTO" "EFAS"
## [9] "FFRAG" "JUVENILE-UNIDENTIFIABLE"
## [11] "MALC" "MANG"
## [13] "MCAV" "MDEC"
## [15] "MMEA" "MPHA"
## [17] "MYALI" "MYLAM"
## [19] "ODIF/OROB" "PAST"
## [21] "PDCLIV" "PDSTR"
## [23] "PHYLLANGIA AMERICANA" "PPOR"
## [25] "SBOU" "SCOLYMIA CUBENSIS"
## [27] "SCOLYMIA LACERA" "SINT"
## [29] "SRAD" "SSID"
## [31] "XESTO"
# Filter out unidentified OR NON-CORAL taxa
tt21 <- tt21 %>%
filter(!taxon %in% c("0", "JUVENILE-UNIDENTIFIABLE", "XESTO", "MALC"))
# Adjust/corrects species IDs
tt21 <- tt21 %>%
mutate(taxon = case_when(
taxon == "AFRAG" ~ "AFRA",
taxon == "FFRAG" ~ "FFRA",
taxon == "MYALI" ~ "MALI",
taxon == "MYFER" ~ "MFER",
taxon == "MYLAM" ~ "MLAM",
taxon == "ODIF/OROB" ~ "OCUL",
taxon == "PDCLIV" ~ "PCLI",
taxon == "PDSTR" ~ "PSTR",
taxon == "PHYLLANGIA AMERICANA" ~ "PAME",
taxon == "SCOLYMIA CUBENSIS" ~ "SCUB",
taxon == "SCOLYMIA LACERA" ~ "SLAC",
TRUE ~ taxon
))
sort(unique(tt21$taxon))
## [1] "AAGA" "ACER" "AFRA" "ALAM" "CNAT" "DLAB" "DSTO" "EFAS" "FFRA" "MALI"
## [11] "MANG" "MCAV" "MDEC" "MLAM" "MMEA" "MPHA" "OCUL" "OFAV" "OFRA" "PAME"
## [21] "PAST" "PCLI" "PPOR" "PSTR" "SBOU" "SCUB" "SINT" "SLAC" "SRAD" "SSID"
# Write long data to file
write_csv(tt21, file = "data/processed/tt_2021_long.csv")
# Count
# Add explicit zeros for any taxon/size class missing at any site
tt21_counts <- tt21 %>%
mutate(class = ifelse(max_width_cm >= 4, ">4cm", "<4cm")) %>%
count(site, taxon, class) %>%
complete(site, taxon, class = c(">4cm", "<4cm"), fill = list(n = 0)) %>%
# Don't create zeros for MEAN/MUSS/FAVI adults, since these IDs only applied to juv
filter(!(taxon %in% c("MEAN", "MUSS", "FAVI") & class == ">4cm" & n == 0))
write_csv(tt21_counts, file = "data/processed/tt_2021_counts.csv")
# Aggregate count data based on taxonomic groups defined above
tt21_counts_ag <- tt21_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
# Further aggregate juvenile counts to family (following DRM methods)
tt21_counts_ag <- tt21_counts_ag %>%
left_join(taxon_lookup_juv, by = "taxon") %>%
mutate(
taxon = if_else(class == "<4cm" & !is.na(taxon_group), taxon_group, taxon)
) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
write_csv(tt21_counts_ag, file = "data/processed/tt21_counts_ag.csv")
TT site metadata
# Site metadata
tt23_sitemd <- readxl::read_xlsx("data/2023_tt_impact/Impact site tracking.xlsx", skip = 1) %>%
janitor::clean_names()
tt23_sitemd <- tt23_sitemd %>%
mutate(site = transect_name,
latitude = as.numeric(actual_start_y),
longitude = as.numeric(actual_start_x)) %>%
select(site, latitude, longitude)
# Many sites missing coords in sheet.... whats up with that
tt23_sitemd <- drop_na(tt23_sitemd, longitude)
TT coral data
tt230 <- readxl::read_xlsx("data/2023_tt_impact/Impact Raw Data 05 31 2024.xlsx") %>%
janitor::clean_names()
tt23 <- tt230 %>%
select(site = transect_name, depth_ft_start,
taxon = id_abbrev, coral_length_cm, coral_width_cm) %>%
filter(taxon != "Xesto") %>%
mutate(taxon = toupper(taxon)) %>%
mutate(across(c(coral_length_cm, coral_width_cm), as.numeric)) %>%
mutate(site = factor(site))
tt23 <- tt23 %>%
mutate(max_width_cm = pmax(coral_length_cm, coral_width_cm)) %>%
select(site, taxon, max_width_cm)
# Check taxa names
sort(unique(tt23$taxon))
## [1] "?" "AAGA" "ACER" "AFRA" "AFRAG" "ALAM"
## [7] "ASP" "ASP." "CNAT" "DLAB" "DSTO" "EFAS"
## [13] "FFRA" "HCUC" "ID-ABBREV" "MCAV" "MCAV?" "MDEC"
## [19] "MHEARD" "MMEA" "MSEN" "MSP." "MUSSID" "MYALI"
## [25] "MYFER" "MYLAM" "NONE" "OFAV" "OFR" "OFRA"
## [31] "OROB" "PAME" "PAST" "PCLI" "PCLI?" "PDIV"
## [37] "PPOR" "PSP" "PSP." "PSTR" "SBOU" "SCUB"
## [43] "SHYA" "SINT" "SLAC" "SRAD" "SSID" "SSP."
## [49] "STOK"
# Filter out unidentified taxa
tt23 <- tt23 %>%
filter(!taxon %in% c("?", "ID-ABBREV", "NONE", "MHEARD"))
# Adjust/corrects species IDs
tt23 <- tt23 %>%
mutate(taxon = case_when(
taxon == "AFRAG" ~ "AFRA",
taxon %in% c("ASP", "ASP.") ~ "AGAR",
taxon == "MCAV?" ~ "MCAV",
taxon == "MSP." ~ "MADR",
taxon == "MUSSID" ~ "MUSS",
taxon == "MYALI" ~ "MALI",
taxon == "MYFER" ~ "MFER",
taxon == "MYLAM" ~ "MLAM",
taxon == "OFR" ~ "OFRA",
taxon == "PCLI?" ~ "PCLI",
taxon %in% c("PSP", "PSP.") ~ "PORI",
taxon == "SSP." ~ "SIDE",
taxon == "STOK" ~ "DSTO",
TRUE ~ taxon
))
sort(unique(tt23$taxon))
## [1] "AAGA" "ACER" "AFRA" "AGAR" "ALAM" "CNAT" "DLAB" "DSTO" "EFAS" "FFRA"
## [11] "HCUC" "MADR" "MALI" "MCAV" "MDEC" "MFER" "MLAM" "MMEA" "MSEN" "MUSS"
## [21] "OFAV" "OFRA" "OROB" "PAME" "PAST" "PCLI" "PDIV" "PORI" "PPOR" "PSTR"
## [31] "SBOU" "SCUB" "SHYA" "SIDE" "SINT" "SLAC" "SRAD" "SSID"
# Write long data to file
write_csv(tt23, file = "data/processed/tt_2024_long.csv")
# Count
# Add explicit zeros for any taxon/size class missing at any site
tt23_counts <- tt23 %>%
mutate(class = ifelse(max_width_cm >= 4, ">4cm", "<4cm")) %>%
count(site, taxon, class) %>%
complete(site, taxon, class = c(">4cm", "<4cm"), fill = list(n = 0)) %>%
# Don't create zeros for MEAN/MUSS/FAVI adults, since these IDs only applied to juv
filter(!(taxon %in% c("MEAN", "MUSS", "FAVI") & class == ">4cm" & n == 0))
write_csv(tt23_counts, file = "data/processed/tt_2024_counts.csv")
# Aggregate count data based on taxonomic groups defined above
tt23_counts_ag <- tt23_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
# Further aggregate juvenile counts to family (following DRM methods)
tt23_counts_ag <- tt23_counts_ag %>%
left_join(taxon_lookup_juv, by = "taxon") %>%
mutate(
taxon = if_else(class == "<4cm" & !is.na(taxon_group), taxon_group, taxon)
) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
write_csv(tt23_counts_ag, file = "data/processed/tt23_counts_ag.csv")
Shedd site metadata
# Site metadata
drm24_sitemd <- readxl::read_xlsx("data/2024_shedd_drm/site_metadata.xlsx") %>%
janitor::clean_names() %>%
mutate(site = as.character(drm_site_id)) %>%
select(site, latitude = lat, longitude = lon) %>%
mutate(depth = NA)
Shedd coral data
# Adult coral data from main DRM surveys
#adults0 <- read_csv("data/2024_shedd_drm/DRM_broward_corals.csv")
#adults0 %>% filter(team == "Shedd Aquarium")
alldrm2024 <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_RawCoralDataTransect1and2_Shedd.xlsx") %>%
janitor::clean_names() %>%
distinct(site, team, date, subregion)
shedddrm2024 <- alldrm2024 %>% filter(team == "Shedd Aquarium")
# shedddrm2024 %>%
# count(subregion, date)
# Most sites were included in main DRM database for 2024 -- Import these
adultst1t2 <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_RawCoralDataTransect1and2_Shedd.xlsx") %>%
janitor::clean_names() %>%
filter(subregion == "Broward-Miami", team == "Shedd Aquarium") %>%
select(site, transect_num, species, width, height)
adultst3t4 <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_RawCoralDataTransect3and4_Shedd.xlsx") %>%
janitor::clean_names() %>%
filter(subregion == "Broward-Miami", team == "Shedd Aquarium") %>%
select(site, transect_num, species, width, height)
# 9 of our PEV sites were removed from DRM database to avoid oversaturating the ares -- Import these separately
removedt1t2 <- readxl::read_xlsx(
"data/2024_shedd_drm/2024_DRM_Broward_RemovedSites_T1-T4_Shedd.xlsx", sheet = "Removed Sites T1-T2") %>%
janitor::clean_names() %>%
select(site, transect_num, species, width, height)
removedt3t4 <- readxl::read_xlsx(
"data/2024_shedd_drm/2024_DRM_Broward_RemovedSites_T1-T4_Shedd.xlsx", sheet = "Removed Sites T3-T4") %>%
janitor::clean_names() %>%
select(site, transect_num, species, width, height)
# Combine all adult coral data for Shedd DRM surveys at PEV
adults0 <- bind_rows(adultst1t2, adultst3t4, removedt1t2, removedt3t4)
# Convert adult data to long format
adults_long <- adults0 %>%
mutate(max_width_cm = pmax(width, height, na.rm = TRUE)) %>%
mutate(max_width_cm = as.character(max_width_cm)) %>%
mutate(site = str_remove(site, "^AA")) %>%
select(site, transect_num, taxon = species, max_width_cm) %>%
drop_na(taxon) # DROPS when taxon is blank, this is when no corals >4cm were observed
# Import juvenile counts from main DRM dataset
juv <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_JuvenileCoralData_Shedd.xlsx") %>%
janitor::clean_names() %>%
filter(subregion == "Broward-Miami", team == "Shedd Aquarium")
# Import juvenile counts from sites that were removed from main DRM dataset
removed_juv <- readxl::read_xlsx("data/2024_shedd_drm/Shedd_removed_sites_Juveniles_2024.xlsx") %>%
janitor::clean_names() %>%
# Missing values in count data should be zero counts (unique to this datasheet from FWC)
mutate(across(ends_with("_ct"), ~replace_na(., 0)))
# Combine juvenile data
juv0 <- bind_rows(juv, removed_juv) %>%
mutate(site = str_remove(site, "^AA")) %>%
select(site, transect_num, ends_with("ct")) %>%
rename(MCAV = montastraea_ct, MUSS = mussinae_ct, FAVI = faviinae_ct, MEAN = meandrinidae_ct)
# Convert juvenile data to long format
juv_long <- juv0 %>%
pivot_longer(c(MUSS, FAVI, MEAN, MCAV), names_to = "taxon", values_to = "n") %>%
mutate(max_width_cm = "<4") %>%
uncount(n)
# Other juvenile taxa counts from Transects 1 and 2 (DRM 'bonus data')
t1t2bonus <- read_csv("data/2024_shedd_drm/T1_T2_bonus_data.csv") %>%
janitor::clean_names() %>%
mutate(site = replace_na(site, "NA")) %>% # Because one site is called "NA"
mutate(transect_num = parse_number(transect))
t1t2juv <- t1t2bonus %>%
select(site, transect_num, starts_with("small")) %>%
rename_with(~ toupper(gsub("^small_", "", .x)), starts_with("small_"))
t1t2juv_long <- t1t2juv %>%
pivot_longer(3:10, names_to = "taxon", values_to = "n") %>%
mutate(max_width_cm = "<4") %>%
uncount(n)
# Replace site names in t1t2 bonus data with the correct DRM site ID
penipsites <- readxl::read_xlsx("data/2024_shedd_drm/site_metadata.xlsx") %>%
janitor::clean_names()
t1t2juv_long_updated <- t1t2juv_long %>%
left_join(penipsites %>% select(site, drm_site_id), by = "site") %>%
mutate(site = as.character(drm_site_id)) %>%
select(-drm_site_id)
# Combine all data
drm24_long <- bind_rows(adults_long, juv_long, t1t2juv_long_updated) %>%
mutate(team = "Shedd Aquarium")
# Check species names
sort(unique(drm24_long$taxon))
## [1] "AAGA" "ACER" "AFRA" "CNAT" "DLAB" "DSTO" "EFAS" "FAVI" "FFRA" "MALI"
## [11] "MAUR" "MCAV" "MDEC" "MEAN" "MMEA" "MUSS" "OANN" "OFAV" "OFRA" "PAST"
## [21] "PCLI" "PFUR" "PPOR" "PSTR" "SBOU" "SCUB" "SINT" "SRAD" "SSID"
write_csv(drm24_long, file = "data/processed/drm_2024_long.csv")
# COUNT based on rules
# ✅ Updated Rules Summary for counting from DRM/Shedd data:
# Juvenile taxa (searched for in <4cm size class only):
# "MEAN", "MUSS", "FAVI"
# → these should only ever appear in <4cm, never >4cm, and should not be zero-filled for adults.
# Other juvenile-capable taxa:
# "MCAV", "SSID", "SRAD", "PAST", "PPOR", "SINT", "SBOU", "AAGA", "MAUR"
# → these can be counted in both >4cm and <4cm, but only in <4cm if juveniles were searched on that transect and team.
# Transect-based search rules still apply:
# Transects 1 & 2: all adult taxa always searched. Juvenile search depends on team:
# "Shedd Aquarium" → all juvenile taxa above searched
# others → only MEAN, MUSS, FAVI, MCAV
# Transects 3 & 4:
# only subset of adult taxa searched (adult_taxa_t3t4)
# only juveniles: MEAN, MUSS, FAVI, MCAV
# Step 1: Define size classes
drm24_classed <- drm24_long %>%
mutate(class = case_when(as.numeric(max_width_cm) >= 4 ~ ">4cm",
max_width_cm == "<4" ~ "<4cm"))
# Step 2: Define species sets
all_taxa <- unique(drm24_classed$taxon)
adult_taxa_t3t4 <- c("CNAT", "DSTO", "DLAB", "MMEA", "MANG", "MALI",
"MFER", "MLAM", "PCLI", "PSTR")
juv_only_taxa <- c("MEAN", "MUSS", "FAVI")
juv_both_taxa <- c("MCAV", "SSID", "SRAD", "PAST", "PPOR", "SINT", "SBOU", "AAGA", "MAUR")
all_juv_taxa <- c(juv_only_taxa, juv_both_taxa)
# Step 3: Build search grid per site × transect × team
search_grid <- drm24_classed %>%
distinct(site, team) %>% # if multiple teams in data, remove value for team
expand_grid(transect_num = 1:4) %>% # creates search grid for all transects even if no corals observed (bc absent from drm24_classed)
mutate(
searched_taxa_class = pmap(list(transect_num, team), function(transect, team) {
# Helper: define juv taxa allowed for this transect/team
juv_taxa <- if (transect %in% c(1, 2)) {
if (team == "Shedd Aquarium") { # Shedd searched for other juv taxa on T1 and T2, other DRM survey teams did not
all_juv_taxa
} else {
c(juv_only_taxa, "MCAV")
}
} else {
c(juv_only_taxa, "MCAV")
}
# Adults always searched in 1 & 2, subset in 3 & 4
adult_taxa <- if (transect %in% c(1, 2)) {
setdiff(all_taxa, juv_only_taxa) # exclude juv-only taxa
} else {
adult_taxa_t3t4
}
# Build grid
bind_rows(
expand_grid(taxon = adult_taxa, class = ">4cm"),
expand_grid(taxon = juv_taxa, class = "<4cm")
)
})
) %>%
unnest(searched_taxa_class)
# Step 4: Count observations
counts <- drm24_classed %>%
group_by(site, transect_num, team, taxon, class) %>%
summarize(n = n(), .groups = "drop")
# Step 5: Join with grid and fill in zeros where appropriate
final_counts <- search_grid %>%
left_join(counts, by = c("site", "transect_num", "team", "taxon", "class")) %>%
mutate(n = replace_na(n, 0))
write_csv(final_counts, file = "data/processed/drm_2024_counts.csv")
# AGGREGATE COUNT DATA
# Aggregate taxa
drm24_counts_ag <- final_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
write_csv(drm24_counts_ag, file = "data/processed/drm_2024_counts_ag.csv")
# Modify DRM2024 dataset to code as one pseudo-transect per site, since all other datasets have only one transect per site. We need the structure to be uniform across datasets for count modeling -- cannot include random effect for site to account for multiple transects per site while all other datasets only have one transect per site, because then there's no variability associated with site and its essentially treated as a fixed effect, which we don't want. Thus, we need to aggregate across transects in the DRM 2024 dataset. But since different taxa were searched for on different numbers of transects we need to do this carefully and have the appropriate total area searched for each taxon for teh pseudo-transect.
# Assign uniform transect area
drm24_counts_ag <- drm24_counts_ag %>%
mutate(transect_area_m2 = 10)
# Aggregate to pseudo-transect per site × taxon × class
drm24_counts_agg <- drm24_counts_ag %>%
group_by(site, taxon, class) %>%
summarise(
n = sum(n, na.rm = TRUE),
transect_area_m2 = sum(transect_area_m2),
.groups = "drop"
)
## Exclude two sites that were selected specifically because of known ACER populations
drm24_counts_agg <- drm24_counts_agg %>%
filter(!site %in% c("3096", "3094"))
# --- STEP 1: Load KML Polygons ---
polygons <- st_read("data/Habitat classifications.kml") # update path as needed
## Reading layer `Habitat classifications' from data source
## `/Users/rcunning/Projects/PENIP/data/Habitat classifications.kml'
## using driver `KML'
## Simple feature collection with 190 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XYZ
## Bounding box: xmin: -80.11618 ymin: 25.97477 xmax: -80.06143 ymax: 26.25943
## z_range: zmin: 0 zmax: 0
## Geodetic CRS: WGS 84
# --- STEP 2: Extract Attributes from HTML Description ---
extract_attrs <- function(desc) {
if (is.na(desc) || desc == "") {
return(tibble(Habitat = NA, Type = NA, Modifier = NA, Region = NA, Type2 = NA))
}
html <- read_html(desc)
rows <- xml_find_all(html, "//table//table//tr")
keys <- rows %>% xml_find_all(".//td[1]") %>% xml_text(trim = TRUE)
vals <- rows %>% xml_find_all(".//td[2]") %>% xml_text(trim = TRUE)
n <- min(length(keys), length(vals))
named_vals <- set_names(vals[1:n], keys[1:n])
tibble(
Habitat = named_vals[["Habitat"]],
Type = named_vals[["Type"]],
Modifier = named_vals[["Modifier"]],
Region = named_vals[["Region"]],
Type2 = named_vals[["Type2"]]
)
}
# Apply function and combine with spatial geometries
attrs <- map_dfr(polygons$Description, extract_attrs)
polygons_clean <- bind_cols(polygons %>% select(-Description), attrs)
###### GET RID OF OVERLAPPING SAND POLYGONS
# 1. Set clean CRS and define area of interest (AOI)
crs_clean <- st_crs(32617)
aoi_bbox <- st_as_sfc(st_bbox(c(xmin = -80.113, xmax = -80.078, ymin = 26.058, ymax = 26.112), crs = 4326))
aoi_bbox_utm <- st_transform(aoi_bbox, crs = crs_clean)
# 2. Reproject and filter
polygons_proj <- st_transform(polygons_clean, crs = crs_clean)
sand_polygons <- polygons_proj %>%
filter(Type == "Sand") %>%
st_intersection(aoi_bbox_utm)
non_sand_polygons <- polygons_proj %>%
filter(Type != "Sand") %>%
st_intersection(aoi_bbox_utm)
# 3. Clean geometries using WKT rebuild
sand_geom_clean <- st_sfc(
lapply(st_as_text(st_geometry(sand_polygons)), function(wkt) st_as_sfc(wkt)[[1]]),
crs = crs_clean
)
nonsand_union_clean <- st_as_sfc(st_as_text(st_union(st_geometry(non_sand_polygons))), crs = crs_clean)[[1]]
# 4. Subtract all non-sand from sand polygons
cut_geoms <- lapply(seq_along(sand_geom_clean), function(i) {
g <- sand_geom_clean[[i]]
g_fixed <- st_buffer(g, 0)
tryCatch(
st_difference(g_fixed, nonsand_union_clean),
error = function(e) {
message("Skipping geometry ", i, " due to error: ", e$message)
st_geometrycollection()
}
)
})
# 5. Rebuild sand_cut sf object and drop empty geometries
sand_cut <- sand_polygons
st_geometry(sand_cut) <- st_sfc(cut_geoms, crs = crs_clean)
sand_cut <- sand_cut[!st_is_empty(sand_cut), ]
# 6. Combine everything
combined <- bind_rows(sand_cut, non_sand_polygons)
# 7. Reproject to WGS84 for plotting
polygons_final <- st_transform(combined, crs = 4326)
######## MERGE WITH SURVEY SITES
# --- STEP 3: Prepare Site Coordinate Data ---
# Get all site coordinates, and assign north and south
# Combine site metadata
allsitemd <- bind_rows(.id = "dataset",
nsu11_esa = nsu11_esa_sitemd,
dca17_esa = dca17_esa_sitemd,
dca17 = dca17_sitemd,
tt21 = tt21_sitemd,
tt23 = tt23_sitemd,
drm24 = drm24_sitemd
) %>%
mutate(dir = if_else(latitude > 26.093570, "N", "S"))
points <- st_as_sf(allsitemd, coords = c("longitude", "latitude"), crs = 4326)
# --- STEP 4: Validate Geometry and Match CRS ---
polygons_final <- polygons_final %>%
st_zm(drop = TRUE, what = "ZM") %>%
st_make_valid() %>%
st_transform(st_crs(points))
sf_use_s2(FALSE) # prevent s2 geometry issues
# --- STEP 5: Spatial Join ---
joined <- st_join(points, polygons_final, join = st_within)
joined_df <- joined %>%
mutate(longitude = st_coordinates(.)[,1],
latitude = st_coordinates(.)[,2]) %>%
st_drop_geometry()
allsitemd <- joined_df
##############
# Not needed anymore because removed overlapping sand polygons VVV
# # 'Sand' overlaps some of the other polygons instead of just surrounding them...
# # If a point is classified as Sand AND something else, remove Sand...
# multiclass <- allsitemd %>%
# group_by(dataset, site) %>%
# filter(n() > 1) %>%
# arrange(site)
#
# allsitemd_clean <- allsitemd %>%
# group_by(dataset, site) %>%
# filter(!(Type == "Sand" & n() > 1)) %>%
# ungroup()
##############
# Add factor if survey was ESA coral species only
allsitemd <- allsitemd %>%
mutate(ESAonly = if_else(dataset %in% c("nsu11_esa", "dca17_esa"), "ESA only", "All corals"))
# Visualize habitat classifications
polyplot <- polygons_final %>%
#filter(Type != "Sand") %>%
ggplot() +
geom_sf(aes(fill = Type), color = NA, size = 0.1, alpha = 0.5) +
scale_fill_brewer(palette = "Set3", na.value = "gray80") +
theme_minimal(base_size = 8) +
labs(fill = "Habitat type", shape = "Survey", x = "", y = "") +
theme(legend.position = "right") +
coord_sf(
xlim = c(-80.113, -80.078), # Adjusted to reduce whitespace
ylim = c(26.058, 26.112),
expand = FALSE
) +
scale_y_continuous(labels = scales::number_format(accuracy = 0.01)) +
scale_x_continuous(breaks = seq(-80.11, -80.08, by = 0.01),
labels = scales::number_format(accuracy = 0.01)) # Fewer long ticks
# Plot all surveyed sites
fig1 <- polyplot +
geom_point(data = allsitemd,
aes(x = longitude, y = latitude, shape = dataset),
inherit.aes = FALSE, alpha = 0.3, size = 0.5) +
facet_wrap(~ESAonly, labeller = as_labeller(c(
"All corals" = "A. Surveys of all corals",
"ESA only" = "B. Surveys of ESA-listed corals"
))) +
scale_shape_manual(values = c(2, 3, 4, 5, 6, 7)) +
theme(
strip.text = element_text(hjust = 0, face = "bold", size = 9),#, margin = margin(l = -10)),
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
legend.text = element_text(size = 7),
legend.title = element_text(size = 8),
legend.key.size = unit(0.35, "cm"), # size of legend boxes
legend.spacing.y = unit(0.2, "cm"), # vertical spacing between legend items
legend.margin = margin(t = 2, r = 2, b = 2, l = 2),
legend.box.margin = margin(0, 0, 0, 0)
)
fig1
ggsave(plot = fig1, filename = "outputs/Fig1.png", width = 170, height = 100, units = "mm", dpi = 300)
# Impact zone Scenario 2 and NMFS consideration areas
# Define the extraction function once
extract_placemark_info <- function(kml_file, source_label) {
ns <- c(kml = "http://www.opengis.net/kml/2.2")
kml <- read_xml(kml_file)
placemarks <- xml_find_all(kml, ".//kml:Placemark", ns)
map(placemarks, function(pm) {
name <- xml_text(xml_find_first(pm, ".//kml:name", ns))
# Try to find the folder name, fallback to "Top Level"
folder_node <- xml_parent(xml_parent(pm))
folder_name <- xml_text(xml_find_first(folder_node, ".//kml:name", ns))
if (is.na(folder_name) || folder_name == "") {
folder_name <- "Top Level"
}
# Get ALL coordinate rings (outer/inner polygons, multi-geometries)
coord_nodes <- xml_find_all(pm, ".//kml:coordinates", ns)
rings <- lapply(coord_nodes, function(cn) {
coords_text <- xml_text(cn)
coords <- strsplit(trimws(coords_text), "\\s+")[[1]]
coords_mat <- do.call(rbind, lapply(coords, function(x) {
as.numeric(strsplit(x, ",")[[1]][1:2])
}))
if (nrow(coords_mat) >= 3) coords_mat else NULL
})
rings <- compact(rings)
if (length(rings) == 0) return(NULL)
# Create polygon or multipolygon depending on number of rings
poly_geom <- if (length(rings) == 1) {
st_polygon(list(rings[[1]]))
} else {
st_multipolygon(list(rings))
}
tibble(
folder = folder_name,
name = name,
geometry = st_sfc(poly_geom, crs = 4326),
source = source_label
)
}) %>%
compact() %>%
bind_rows() %>%
st_as_sf()
}
# File paths
impact_kml <- "data/Impact_zones_Scenario 2.kml"
nmfs_kml <- "data/Scenario2_NMFSConsiderationAreas.kml"
# Extract and label
impact_zones_sf <- extract_placemark_info(impact_kml, "Impact Zone")
nmfs_sf <- extract_placemark_info(nmfs_kml, "NMFS Consideration Area")
# Combine
all_zones_sf <- bind_rows(impact_zones_sf, nmfs_sf)
# Rename polygons based on zone folder
all_zones_sf <- all_zones_sf %>%
mutate(
name = if_else(
is.na(name) | name == "",
str_remove(str_remove(folder, "^Scenario_2_"), "\\.shp$"),
name
),
scenario = "Scenario 2 + NMFS Consideration Areas"
)
# Create new polygon covering Rest of Monitoring Area, from 1.2 km N to 1.2 km S of the channel
# 1. Extract the 'Channel' polygon
channel_poly <- all_zones_sf %>%
filter(str_detect(tolower(name), "channel"))
# 2. Transform all relevant layers to UTM (meters) for buffering
utm_crs <- 32617 # UTM Zone 17N — suitable for SE Florida
if (st_is_longlat(channel_poly)) {
channel_poly <- st_transform(channel_poly, utm_crs)
polygons_clean_m <- st_transform(polygons_clean, utm_crs)
all_zones_sf_m <- st_transform(all_zones_sf, utm_crs)
} else {
polygons_clean_m <- polygons_clean
all_zones_sf_m <- all_zones_sf
}
# 3. Get bounds of channel (north-south)
bbox_channel <- st_bbox(channel_poly)
# 4. Define rectangular box: full E-W extent of reef habitat, ±1.2 km N/S of channel
ew_range <- st_bbox(polygons_clean_m)[c("xmin", "xmax")]
ns_range <- c(bbox_channel["ymin"] - 1200, bbox_channel["ymax"] + 1200)
names(ns_range) <- c("ymin", "ymax")
new_box <- st_as_sfc(st_bbox(c(
xmin = ew_range[[1]],
xmax = ew_range[[2]],
ymin = ns_range[[1]],
ymax = ns_range[[2]]
), crs = st_crs(polygons_clean_m)))
# 5. Clip the box to reef habitat
intersected_box <- st_union(st_intersection(new_box, st_union(polygons_clean_m)))
# 6. Subtract all existing zones (Impact + NMFS) by taking concave hull
# 6. Create a tight, shrink-wrapped outer boundary around all existing zones
existing_zone_hull <- all_zones_sf_m %>%
st_make_valid() %>%
st_union() %>%
st_buffer(20) %>% # Buffer OUTWARD 40 meters to close gaps (adjust as needed)
st_union() %>%
st_buffer(-20) # Buffer INWARD to return to approximate original size
# 7. Subtract existing zones from monitoring area
new_zone_geom <- st_difference(intersected_box, existing_zone_hull)
# 8. Build the new polygon row
new_zone <- st_sf(
folder = "Generated",
name = "Rest of Monitoring Area",
scenario = "Rest of Monitoring Area",
geometry = st_sfc(new_zone_geom),
source = "Impact Zone",
crs = st_crs(polygons_clean_m)
)
# 9. Transform back to match original CRS (likely WGS84)
if (st_crs(new_zone) != st_crs(all_zones_sf)) {
new_zone <- st_transform(new_zone, st_crs(all_zones_sf))
}
# 10. Add the new zone to the combined zones object
all_zones_sf <- bind_rows(all_zones_sf, new_zone)
zone_colors <- RColorBrewer::brewer.pal(12, "Paired") # Or use "Set3", "Set1", "Dark2", etc.
names(zone_colors) <- unique(all_zones_sf$name) # Make sure order matches levels
ggplot() +
# Habitat polygons (no legend)
geom_sf(data = polygons_clean, aes(fill = Type),
color = NA, size = 0.1, alpha = 0.25, show.legend = FALSE) +
scale_fill_brewer(palette = "Set3", na.value = "gray80") +
ggnewscale::new_scale_fill() +
# Zones (with 12 manual colors and legend)
geom_sf(data = all_zones_sf, aes(fill = name),
color = NA, linewidth = 0.5, alpha = 0.6) +
scale_fill_manual(values = zone_colors, name = "Zone Type") +
theme_minimal() +
theme(legend.position = "none") +
coord_sf(
xlim = c(-80.113, -80.078),
ylim = c(26.075, 26.11),
expand = FALSE
) +
scale_y_continuous(labels = scales::number_format(accuracy = 0.01)) +
scale_x_continuous(breaks = seq(-80.11, -80.08, by = 0.01),
labels = scales::number_format(accuracy = 0.01))
# Set order of zone levels
zone_levels <- c("Channel", "Side Slopes", "Depth_10cm", "Depth_5cm", "Depth_1cm", "Depth_0_5cm", "Depth_0_1cm",
"Nearshore", "North", "East", "South", "Rest of Monitoring Area")
# Make sure 'Artificial' is changed to 'Nearshore Ridge Complex' in habitat polygons, and that Aggregated Patch Reef is changed to Outer Reef
polygons_filtered <- polygons_final %>%
mutate(Type = if_else(Type == "Artificial", "Nearshore Ridge Complex", Type)) %>%
mutate(Type = if_else(Type == "Aggregated Patch Reef", "Outer Reef", Type)) %>%
filter(Type %in% type_levels)
# Clean any invalid geometry
polygons_filtered <- st_make_valid(polygons_filtered)
all_zones_sf <- st_make_valid(all_zones_sf)
# Intersect with habitat polygons
habitat_in_zones <- st_intersection(polygons_filtered, all_zones_sf)
# Project for accurate area
habitat_in_zones_proj <- habitat_in_zones %>%
st_transform(32617) %>%
mutate(area_m2 = st_area(geometry))
# Summarize area by habitat type and zone name
area_summary <- habitat_in_zones_proj %>%
st_drop_geometry() %>%
mutate(name = factor(name, levels = zone_levels),
Type = factor(Type, levels = type_levels)) %>%
group_by(Type, name) %>%
summarize(total_area_m2 = sum(as.numeric(area_m2)), .groups = "drop") %>%
arrange(name)
# Print total area of each habitat in each impact zone
area_summary %>%
pivot_wider(names_from = name, values_from = total_area_m2, names_sort = FALSE)
## # A tibble: 4 × 13
## Type Channel `Side Slopes` Depth_10cm Depth_5cm Depth_1cm Depth_0_5cm
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Nearshore Ri… 451. 6087. 44168. 45276. 219447. 149738.
## 2 Inner Reef 517. 494. 12133. 11048. 49698. 28911.
## 3 Middle Reef 29875. 1686. 9981. 9151. 40886. 27270.
## 4 Outer Reef 54911. 790. 10672. 11274. 44370. 29690.
## # ℹ 6 more variables: Depth_0_1cm <dbl>, Nearshore <dbl>, North <dbl>,
## # East <dbl>, South <dbl>, `Rest of Monitoring Area` <dbl>
# Combine for full Scenario2+NMFS, and Rest of Monitoring Area
area_summary2 <- habitat_in_zones_proj %>%
st_drop_geometry() %>%
mutate(Type = factor(Type, levels = type_levels)) %>%
group_by(Type, scenario) %>%
summarize(total_area_m2 = sum(as.numeric(area_m2)), .groups = "drop")
area_summary2 %>%
pivot_wider(names_from = scenario, values_from = total_area_m2)
## # A tibble: 4 × 3
## Type `Rest of Monitoring Area` Scenario 2 + NMFS Consider…¹
## <fct> <dbl> <dbl>
## 1 Nearshore Ridge Complex 1233823. 911910.
## 2 Inner Reef 408458. 212905.
## 3 Middle Reef 264250. 169818.
## 4 Outer Reef 441476. 345958.
## # ℹ abbreviated name: ¹`Scenario 2 + NMFS Consideration Areas`
# Combine all aggregated count data
df <- bind_rows(.id = "dataset",
nsu11_esa = nsu11_esa_counts_ag,
dca17_esa = dca17_esa_counts_ag_full,
dca17 = dca17_counts_ag,
tt21 = tt21_counts_ag,
tt23 = tt23_counts_ag,
drm24 = drm24_counts_agg # use data aggregated to 1 pseudo-transect - already has transect_area_m2
)
# Add transect area information for other datasets
## Site-specific areas for tt21 (since there was sand in transects)
tt21_areas <- tt21_m_nosand %>%
mutate(dataset = "tt21") %>%
distinct(dataset, site, transect_area_m2 = area_m2)
df <- df %>%
left_join(tt21_areas, by = c("dataset", "site")) %>%
mutate(
transect_area_m2 = coalesce(transect_area_m2.x, transect_area_m2.y)
) %>%
select(-transect_area_m2.x, -transect_area_m2.y)
## Other datasets had fixed transect areas
df <- df %>%
mutate(transect_area_m2 = case_when(
# Count-specific areas for nsu11 (since they did tier 1 surveys and tier 2 only if n > 5 for a taxon)
dataset == "nsu11_esa" & taxon == "ACER" & n >= 5 ~ 600,
dataset == "nsu11_esa" & taxon == "ACER" & n < 5 ~ 3600,
dataset == "nsu11_esa" & taxon != "ACER" ~ 600,
dataset == "dca17_esa" ~ 784,
dataset == "dca17" ~ 30, # DCA belt transects were 30m
dataset == "tt23" ~ 20, # TT23 belt transects were 20m
TRUE ~ transect_area_m2))
# Remove surveys with transect_area_m2 == 0 (a couple of TT21 surveys were all sand)
df <- df %>% filter(transect_area_m2 > 0)
# Select sites in Nearshore Ridge Complex, Inner Reef, and Middle Reef, Outer Reef and Aggregated Patch Reef
selected <- allsitemd %>%
filter(Type %in% c("Nearshore Ridge Complex", "Inner Reef", "Middle Reef",
"Artificial", "Outer Reef", "Aggregated Patch Reef"))
# Plot all selected sites
polyplot +
geom_point(data = selected,
aes(x = longitude, y = latitude, shape = dataset),
inherit.aes = FALSE, alpha = 0.6) +
scale_shape_manual(values = c(2, 3, 4, 5, 6, 7)) +
facet_wrap(~ESAonly) +
labs(title = "Selected sites - all surveys")
# Filter dataset to just selected sites
dff <- df %>% inner_join(selected)
Shedd dataset has some sites in the NRC south that are further from the channel relative to other datasets. Are these similar enough to the NRC south sites that are closer to the channel that they can be reasonably included?
First, look at total coral density between the nearer vs. farther sites
drm24_NRCS <- drm24_counts_ag %>%
left_join(allsitemd) %>%
filter(Type == "Nearshore Ridge Complex", dir == "S")
drm24_NRCS <- drm24_NRCS %>%
mutate(grp = if_else(latitude > 26.08, "near", "far"))
# Fit a Negative Binomial GLM
mod_nb <- MASS::glm.nb(n ~ grp, data = drm24_NRCS)
# Generate new data only for existing taxon-size class combinations
newdata_1 <- drm24_NRCS %>%
distinct(grp)
# Get predicted values & standard errors (log scale)
preds_nb <- predict(mod_nb, newdata_1, type = "link", se.fit = TRUE)
# Compute total coral density
results_nb <- newdata_1 %>%
mutate(
fit = exp(preds_nb$fit), # Convert fitted values to response scale
fit_se = exp(preds_nb$fit) * preds_nb$se.fit, # Convert SE using the Delta Method
fit_var = (fit * preds_nb$se.fit)^2, # Variance propagation
fit_lower = exp(preds_nb$fit - 1.96 * preds_nb$se.fit), # Lower CI
fit_upper = exp(preds_nb$fit + 1.96 * preds_nb$se.fit) # Upper CI
)
# Compute total coral density + confidence intervals
total_ci_nb <- results_nb %>%
group_by(grp) %>%
summarize(
total_density = sum(fit),
total_se = sqrt(sum(fit_var)),
lower_95CI = exp(log(total_density) - 1.96 * (total_se / total_density)),
upper_95CI = exp(log(total_density) + 1.96 * (total_se / total_density))
)
knitr::kable(total_ci_nb)
| grp | total_density | total_se | lower_95CI | upper_95CI |
|---|---|---|---|---|
| far | 0.7357724 | 0.1473072 | 0.4969622 | 1.0893403 |
| near | 0.4922395 | 0.0740145 | 0.3665938 | 0.6609488 |
Next, look at community composition between the nearer vs. farther sites
# 1. Create a wide community matrix: rows = site x grp, columns = taxa
comm_matrix <- drm24_NRCS %>%
group_by(site, grp, taxon) %>%
summarize(total_n = sum(n), .groups = "drop") %>%
pivot_wider(names_from = taxon, values_from = total_n, values_fill = 0)
# 2. Extract community matrix and metadata
comm_data <- comm_matrix %>% select(-site, -grp)
site_info <- comm_matrix %>% select(site, grp)
# 3. Run NMDS (k = 2 dimensions is standard)
set.seed(123) # for reproducibility
nmds <- metaMDS(comm_data, distance = "bray", k = 2, trymax = 100)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1988349
## Run 1 stress 0.2356599
## Run 2 stress 0.1884537
## ... New best solution
## ... Procrustes: rmse 0.1300637 max resid 0.3940418
## Run 3 stress 0.2288954
## Run 4 stress 0.1884538
## ... Procrustes: rmse 9.825042e-05 max resid 0.0002200681
## ... Similar to previous best
## Run 5 stress 0.2068049
## Run 6 stress 0.2071272
## Run 7 stress 0.2387491
## Run 8 stress 0.1989884
## Run 9 stress 0.2176251
## Run 10 stress 0.2667308
## Run 11 stress 0.22334
## Run 12 stress 0.2004841
## Run 13 stress 0.2264123
## Run 14 stress 0.205021
## Run 15 stress 0.2105966
## Run 16 stress 0.2304369
## Run 17 stress 0.2068049
## Run 18 stress 0.198835
## Run 19 stress 0.1884537
## ... New best solution
## ... Procrustes: rmse 0.0001556115 max resid 0.0003352602
## ... Similar to previous best
## Run 20 stress 0.1919049
## *** Best solution repeated 1 times
# 4. Prepare data for plotting
scores_df <- scores(nmds)$sites %>%
bind_cols(site_info)
# 5. Plot NMDS with ggplot2
ggplot(scores_df, aes(x = NMDS1, y = NMDS2, color = grp)) +
geom_point(size = 3, alpha = 0.8) +
theme_minimal() +
labs(title = "NMDS of Coral Community Structure\nby Dist from Channel in NRC South",
color = "Dist from channel")
Density and community composition are not different in the NRC S sites that are nearer vs. farther from the channel. So, no need to exclude the farther sites.
DCA dataset has the most sites in the “Artificial” habitat classification. (Some in other datasets too, but DCA has most). Can ‘Artificial’ be aggregated with ‘Nearshore Ridge Complex’?
‘Artificial’ is only present in DCA and minorly in TT – but absent
from Shedd.
It is in close spatial proximity to Nearshore Ridge Complex – can these
be combined?
Test for differences in coral density in DCA survey between habitat types
# Subset DCA data
dcadf <- dca17_counts_ag %>%
left_join(allsitemd) %>%
filter(Type %in% c("Nearshore Ridge Complex", "Inner Reef", "Middle Reef", "Artificial", "Outer Reef", "Aggregated Patch Reef"))
# Fit a Negative Binomial GLM
mod_nb <- MASS::glm.nb(n ~ Type, data = dcadf)
# Generate new data only for existing taxon-size class combinations
newdata_1 <- dcadf %>%
distinct(Type)
# Get predicted values & standard errors (log scale)
preds_nb <- predict(mod_nb, newdata_1, type = "link", se.fit = TRUE)
# Compute both total coral density & taxon-size class-specific densities in one step
results_nb <- newdata_1 %>%
mutate(
fit = exp(preds_nb$fit), # Convert fitted values to response scale
fit_se = exp(preds_nb$fit) * preds_nb$se.fit, # Convert SE using the Delta Method
fit_var = (fit * preds_nb$se.fit)^2, # Variance propagation
fit_lower = exp(preds_nb$fit - 1.96 * preds_nb$se.fit), # Lower CI
fit_upper = exp(preds_nb$fit + 1.96 * preds_nb$se.fit) # Upper CI
)
# Compute total coral density + confidence intervals
total_ci_nb <- results_nb %>%
group_by(Type) %>%
summarize(
total_density = sum(fit),
total_se = sqrt(sum(fit_var)),
lower_95CI = exp(log(total_density) - 1.96 * (total_se / total_density)),
upper_95CI = exp(log(total_density) + 1.96 * (total_se / total_density))
)
knitr::kable(total_ci_nb)
| Type | total_density | total_se | lower_95CI | upper_95CI |
|---|---|---|---|---|
| Aggregated Patch Reef | 1.3971429 | 0.1634482 | 1.1108594 | 1.757205 |
| Artificial | 1.5571429 | 0.1814656 | 1.2391666 | 1.956713 |
| Inner Reef | 1.2116883 | 0.1359261 | 0.9725281 | 1.509662 |
| Middle Reef | 0.7593407 | 0.0803194 | 0.6171618 | 0.934274 |
| Nearshore Ridge Complex | 1.7598639 | 0.1409740 | 1.5041540 | 2.059045 |
| Outer Reef | 1.4123077 | 0.0916120 | 1.2436939 | 1.603781 |
Highly overlapping confidence intervals for Artifical and Nearshore Ridge Complex, so no difference in coral density.
Test for differences in community composition in DCA survey
library(vegan)
# 1. Create a wide community matrix: rows = site x Type, columns = taxa
comm_matrix <- dcadf %>%
group_by(site, Type, taxon) %>%
summarize(total_n = sum(n), .groups = "drop") %>%
pivot_wider(names_from = taxon, values_from = total_n, values_fill = 0)
# 2. Extract community matrix and metadata
comm_data <- comm_matrix %>% select(-site, -Type)
site_info <- comm_matrix %>% select(site, Type)
# 3. Run NMDS (k = 2 dimensions is standard)
set.seed(123) # for reproducibility
nmds <- metaMDS(comm_data, distance = "bray", k = 2, trymax = 100)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2309134
## Run 1 stress 0.2326952
## Run 2 stress 0.2368361
## Run 3 stress 0.2338776
## Run 4 stress 0.2335654
## Run 5 stress 0.2309072
## ... New best solution
## ... Procrustes: rmse 0.0007007125 max resid 0.006922102
## ... Similar to previous best
## Run 6 stress 0.2389423
## Run 7 stress 0.2432639
## Run 8 stress 0.2384586
## Run 9 stress 0.2406445
## Run 10 stress 0.2321453
## Run 11 stress 0.2326945
## Run 12 stress 0.2339784
## Run 13 stress 0.2309573
## ... Procrustes: rmse 0.003585168 max resid 0.04151555
## Run 14 stress 0.2371307
## Run 15 stress 0.2413286
## Run 16 stress 0.2345349
## Run 17 stress 0.2389431
## Run 18 stress 0.2418034
## Run 19 stress 0.2333151
## Run 20 stress 0.2363065
## *** Best solution repeated 1 times
# 4. Prepare data for plotting
scores_df <- scores(nmds)$sites %>%
bind_cols(site_info)
# 5. Plot NMDS with ggplot2
ggplot(scores_df, aes(x = NMDS1, y = NMDS2, color = Type)) +
geom_point(size = 3, alpha = 0.8) +
theme_minimal() +
labs(title = "NMDS of Coral Community Structure by Habitat Type",
color = "Habitat Type")
High degree of similarity between Artificial and Nearshore Ridge Complex coral communities.
Combine ‘Artificial’ with ‘Nearshore Ridge Complex’
# Based on these results, combine "Artificial" with "Nearshore Ridge Complex"
dff[dff$Type == "Artificial", "Type"] <- "Nearshore Ridge Complex"
Combine ‘Aggregated Patch Reef’ with ‘Outer Reef’, following previous studies (DCA)
dff[dff$Type == "Aggregated Patch Reef", "Type"] <- "Outer Reef"
Remove lowest abundance coral taxa
These will be problematic for statistical models.
# Drop taxa with very few observations
sppcounts <- dff %>%
group_by(taxon) %>%
summarize(total = sum(n), .groups = "drop") %>%
arrange(total)
sppcounts
## # A tibble: 27 × 2
## taxon total
## <chr> <dbl>
## 1 MANG 0
## 2 MARE 0
## 3 FFRA 1
## 4 HCUC 1
## 5 PAME 1
## 6 SCOL 2
## 7 OCUL 3
## 8 CNAT 5
## 9 DLAB 18
## 10 PCLI 27
## # ℹ 17 more rows
dff <- dff %>%
filter(taxon %in% filter(sppcounts, total >= 5)$taxon) %>%
mutate(Type = factor(Type, levels = type_levels))
Taxa with less than 5 total observations were filtered out, which included: HCUC, MANG, PAME, FFRA, OCUL, SCOL
# Count number of sites per habitat
nsites <- dff %>%
group_by(dataset, Type) %>%
summarize(n = n_distinct(site)) %>%
mutate(Type = factor(Type, levels = type_levels)) %>%
arrange(Type) %>%
pivot_wider(names_from = Type, values_from = n, values_fill = 0) %>%
janitor::adorn_totals(where = "col", name = "Total Sites")
# Get area surveyed per transect and total area surveyed
area_per_site <- dff %>%
group_by(dataset) %>%
summarise(
area_per_site = {
r <- range(transect_area_m2)
if (r[1] == r[2]) as.character(r[1]) else paste(r, collapse = " — ")})
total_area <- dff %>%
group_by(dataset, site) %>%
summarize(max_area = max(transect_area_m2)) %>%
summarize(total_area = sum(max_area))
# Get total number of corals counted
total_corals <- dff %>%
group_by(dataset) %>%
summarize(total_n = sum(n))
survey_summary <- reduce(
list(nsites, area_per_site, total_area, total_corals),
full_join,
by = "dataset"
) %>%
mutate(dataset = factor(dataset, levels = dataset_levels)) %>%
arrange(dataset) %>%
janitor::adorn_totals(where = "row")
write_csv(survey_summary, file = "outputs/Table1.csv")
survey_summary %>% knitr::kable()
| dataset | Nearshore Ridge Complex | Inner Reef | Middle Reef | Outer Reef | Total Sites | area_per_site | total_area | total_n |
|---|---|---|---|---|---|---|---|---|
| nsu11_esa | 85 | 94 | 83 | 111 | 373 | 600 — 3600 | 1255800.0 | 11871 |
| dca17_esa | 79 | 17 | 21 | 25 | 142 | 784 | 111328.0 | 3563 |
| dca17 | 62 | 22 | 26 | 85 | 195 | 30 | 5850.0 | 9489 |
| tt21 | 7 | 6 | 5 | 17 | 35 | 3.9 — 30 | 928.2 | 2172 |
| tt23 | 46 | 13 | 24 | 35 | 118 | 20 | 2360.0 | 4388 |
| drm24 | 25 | 15 | 6 | 0 | 46 | 20 — 40 | 1840.0 | 2559 |
| Total | 304 | 167 | 165 | 273 | 909 | - | 1378106.2 | 34042 |
(nsu11_esa_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "nsu11_esa"),
aes(x = longitude, y = latitude), pch = 4, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2011 — NSU ESA Survey"))
(dca17_esa_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "dca17_esa"),
aes(x = longitude, y = latitude), pch = 4, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2017 — DCA ESA Survey"))
(dca_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "dca17"),
aes(x = longitude, y = latitude), pch = 4, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2017 — DCA RECON Survey"))
(tt21_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "tt21"),
aes(x = longitude, y = latitude), pch = 4, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2021 — Tetra Tech Survey"))
(tt23_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "tt23"),
aes(x = longitude, y = latitude), pch = 4, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2023 — Tetra Tech Survey"))
(drm24_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "drm24"),
aes(x = longitude, y = latitude), pch = 4, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2024 — Shedd Survey"))
Predictors: dataset, habitat Type, direction from channel, taxon, size class
# Just get columns we need for modeling
dfftaxon <- dff %>% select(dataset, Type, dir, site, transect_area_m2, taxon, class, n)
# MODEL - FIT ON ALL DATA INCLUDING ALL STRUCTURAL ZEROS
# mod_nb <- brm(
# bf(n ~ dataset * taxon:class * Type + taxon:class:Type:dir +
# offset(log(transect_area_m2)),
# zi ~ taxon * Type),
# family = zero_inflated_negbinomial(),
# data = dfftaxon, # choose dataset with or without all structural zeros
# prior = c(prior(normal(0, 2), class = "b"), # Weak prior on coefficients
# prior(normal(0, 5), class = "Intercept"), # Weak prior on intercept
# prior(exponential(1), class = "shape"), # Reasonable prior for NB dispersion
# prior(normal(0, 1), class = "b", dpar = "zi")),
# chains = 4,
# cores = 4,
# threads = threading(5),
# iter = 200, warmup = 50, #iter = 1000, warmup = 500,
# thin = 2,
# control = list(adapt_delta = 0.85, max_treedepth = 10),
# backend = "cmdstanr"
# )
# saveRDS(mod_nb, file = "data/processed/mod_nb.rds")
mod_nb <- readRDS("data/processed/mod_nb.rds")
# 1. Create newdata grid (1 m² for standardization)
# Remove any taxa not observed in a given dataset / habitat Type ?????
dfftaxon_trimmed <- dfftaxon %>%
group_by(dataset, Type, dir, taxon, class) %>%
filter(any(n > 0)) %>%
ungroup()
newdata <- dfftaxon_trimmed %>% # USE DATA WITHOUT THE UNOBSERVED CORALS (DATASET/TYPE/DIR/TAXON/CLASS)
distinct(dataset, Type, dir, taxon, class) %>%
mutate(transect_area_m2 = 1)
# 2. Get fitted values manually by computing summary statistics across all draws
posterior_draws <- add_epred_draws(mod_nb, newdata = newdata,
allow_new_levels = TRUE, re_formula = NA) %>%
mutate(Type = factor(Type, levels = type_levels),
dataset = factor(dataset, levels = dataset_levels))
fitted_taxon <- posterior_draws %>%
# Sum across size classes within taxa
group_by(dataset, Type, dir, taxon, .draw) %>%
summarise(
epred_sum = sum(.epred), # 👈 sum size classes
.groups = "drop"
) %>%
# Average across draws
group_by(dataset, Type, dir, taxon) %>%
summarise(
fit_mean = mean(epred_sum),
fit_sd = sd(epred_sum),
fit_lower = quantile(epred_sum, 0.025),
fit_upper = quantile(epred_sum, 0.975),
.groups = "drop"
)
# Plot
ggplot(fitted_taxon, aes(y = fit_mean, x = Type, color = dir, shape = dataset,
group = interaction(dataset, dir))) +
facet_wrap(taxon ~ .) +
geom_point(position = position_dodge(width = 0.5)) +
geom_line(position = position_dodge(width = 0.5), alpha = 0.5) +
geom_errorbar(aes(ymin = fit_lower, ymax = fit_upper), width = 0,
position = position_dodge(width = 0.5), lwd = 0.25) +
scale_y_log10()+#limits = c(1e-4, 2)) +
scale_x_discrete(labels = type_labels) +
labs(x = "Habitat Type", y = "Density (per m2)")
# Any N-S differences in taxon abundance within habitat Types/datasets?
#### Pivot wide so you can calculate N vs S difference per draw
taxon_NS_diff <- posterior_draws %>%
ungroup() %>%
select(.draw, dataset, Type, dir, taxon, class, .epred) %>%
pivot_wider(names_from = dir, values_from = .epred) %>%
filter(!is.na(N), !is.na(S)) %>% # ensure both directions exist for the draw
mutate(diff = S - N) # or N - S depending on interpretation
taxon_NS_summ <- taxon_NS_diff %>%
group_by(dataset, Type, taxon, class) %>%
summarize(
mean_diff = mean(diff),
lower_95 = quantile(diff, 0.025),
upper_95 = quantile(diff, 0.975),
p_two_sided = 2 * min(mean(diff > 0), mean(diff < 0)),
.groups = "drop"
)
#table(p.adjust(taxon_NS_summ$p_two_sided, method = "holm") < 0.05)
# taxon_NS_summ[19,]
taxon_NS_summ %>%
mutate(p_adj = p.adjust(p_two_sided, method = "bonferroni")) %>%
filter(p_adj < 0.05) %>%
mutate(greater = if_else(mean_diff > 0, "N", "S")) %>%
group_by(dataset, Type, greater) %>%
summarize(taxa = paste(taxon, class, collapse = ",")) %>%
arrange(Type, greater) %>%
knitr::kable()
| dataset | Type | greater | taxa |
|---|---|---|---|
| dca17 | Nearshore Ridge Complex | N | MEAN <4cm,SINT >4cm |
| tt21 | Nearshore Ridge Complex | N | SINT >4cm |
| tt23 | Nearshore Ridge Complex | N | MEAN <4cm,SINT >4cm |
| drm24 | Nearshore Ridge Complex | N | SINT >4cm |
| dca17 | Nearshore Ridge Complex | S | SOLE >4cm |
| tt23 | Nearshore Ridge Complex | S | SOLE >4cm |
| drm24 | Nearshore Ridge Complex | S | SOLE >4cm |
| tt23 | Middle Reef | N | AGAR >4cm |
| dca17 | Outer Reef | N | DSTO >4cm,PORI <4cm,SINT <4cm |
| tt21 | Outer Reef | N | DSTO >4cm,PORI <4cm,SINT <4cm |
| tt23 | Outer Reef | N | DSTO >4cm,PORI <4cm,SINT <4cm |
taxon_NS_summ %>% filter(dataset == "dca17", Type == "Nearshore Ridge Complex", taxon == "MCAV", class == "<4cm")
## # A tibble: 1 × 8
## dataset Type taxon class mean_diff lower_95 upper_95 p_two_sided
## <fct> <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 dca17 Nearshore Ridge C… MCAV <4cm 0.0337 0.00483 0.0603 0.02
A minority of taxon × size class × habitat × dataset combinations show a significant north-south effect. The majority do not. they are dataset/habitat/taxon/size class-specific, and idiosyncratic. My goal is not to focus on dir (direction) in the main analyses, but I want to account for it appropriately so it doesn’t confound other effects. Let’s move direction to a random effect. This approach acknowledges the variability without overfitting. It’s flexible and allows for occasional strong differences without forcing them to be estimated everywhere.
((When I changed FAVI adults to individual taxa and the formula in the first model to taxon:class in the fixed effects instead of taxon * class, the number of N-S differences dropped by a lot. probably because multiple comparisons numbers go up because more taxa.))
# mod_nb2 <- brm(
# bf(n ~ dataset * taxon:class * Type +
# (1 | taxon:class:Type:dir) +
# offset(log(transect_area_m2)),
# zi ~ taxon * Type),
# family = zero_inflated_negbinomial(),
# data = dfftaxon, # choose dataset with or without all structural zeros
# prior = c(prior(normal(0, 2), class = "b"), # Weak prior on coefficients
# prior(normal(0, 5), class = "Intercept"), # Weak prior on intercept
# prior(exponential(1), class = "shape"), # Reasonable prior for NB dispersion
# prior(normal(0, 1), class = "b", dpar = "zi")),
# chains = 4,
# cores = 4,
# threads = threading(5),
# iter = 200, warmup = 50, #iter = 1000, warmup = 500,
# thin = 2,
# control = list(adapt_delta = 0.85, max_treedepth = 10),
# backend = "cmdstanr"
# )
# saveRDS(mod_nb2, file = "data/processed/mod_nb2.rds")
mod_nb2 <- readRDS("data/processed/mod_nb2.rds")
loo_1 <- loo(mod_nb)
loo_2 <- loo(mod_nb2)
loo_compare(loo_1, loo_2)
# means second model with dir as random effect is better
newdata2 <- dfftaxon_trimmed %>% # USE DATA WITHOUT THE UNOBSERVED CORALS (DATASET/TYPE/DIR/TAXON/CLASS)
distinct(dataset, Type, taxon, class) %>%
mutate(transect_area_m2 = 1)
# 2. Get fitted values manually by computing summary statistics across all draws
posterior_draws2 <- add_epred_draws(mod_nb2, newdata = newdata2,
allow_new_levels = TRUE, re_formula = NA) %>%
mutate(Type = factor(Type, levels = type_levels),
dataset = factor(dataset, levels = dataset_levels))
fitted_taxon2 <- posterior_draws2 %>%
group_by(dataset, Type, taxon, class) %>%
summarise(
fit_mean = mean(.epred),
fit_sd = sd(.epred),
fit_lower = quantile(.epred, 0.025),
fit_upper = quantile(.epred, 0.975),
.groups = "drop"
)
# Plot
ggplot(fitted_taxon2, aes(y = fit_mean, x = Type, shape = dataset,
group = interaction(dataset, class), color = class)) +
facet_wrap(taxon ~ .) +
geom_point(position = position_dodge(width = 0.5)) +
geom_line(position = position_dodge(width = 0.5), alpha = 0.5) +
geom_errorbar(aes(ymin = fit_lower, ymax = fit_upper), width = 0,
position = position_dodge(width = 0.5), lwd = 0.25) +
scale_y_log10()+#limits = c(1e-4, 2)) +
scale_x_discrete(labels = type_labels) +
labs(x = "Habitat Type", y = "Density (per m2)")
# Ensure dataset is character so we can do < comparison
draws_clean <- posterior_draws2 %>%
mutate(dataset = as.character(dataset))
# One self-join to get all unique dataset pairs
pairwise_contrasts <- draws_clean %>%
rename(dataset1 = dataset, epred1 = .epred) %>%
inner_join(
draws_clean %>% rename(dataset2 = dataset, epred2 = .epred),
by = c(".draw", "taxon", "class", "Type")
) %>%
filter(dataset1 < dataset2) %>% # Avoid duplicates and self-pairs
mutate(diff = epred1 - epred2)
# Step: summarize the posterior differences
summary_contrasts <- pairwise_contrasts %>%
group_by(taxon, class, Type, dataset1, dataset2) %>%
summarize(mean_diff = mean(diff),
lower = quantile(diff, 0.025),
upper = quantile(diff, 0.975),
p_two_sided = 2 * min(mean(diff > 0), mean(diff < 0)),
.groups = "drop")
summary_contrasts %>%
mutate(p_adj = p.adjust(p_two_sided, method = "bonferroni")) %>%
filter(p_adj < 0.01) %>%
arrange(taxon, Type) %>%
knitr::kable()
| taxon | class | Type | dataset1 | dataset2 | mean_diff | lower | upper | p_two_sided | p_adj |
|---|---|---|---|---|---|---|---|---|---|
| AGAR | >4cm | Middle Reef | dca17 | tt23 | -0.0206681 | -0.0475759 | -0.0055214 | 0 | 0 |
| DSTO | >4cm | Nearshore Ridge Complex | dca17 | nsu11_esa | -0.0273275 | -0.0451498 | -0.0133318 | 0 | 0 |
| DSTO | >4cm | Nearshore Ridge Complex | drm24 | nsu11_esa | -0.0278040 | -0.0470381 | -0.0125370 | 0 | 0 |
| EFAS | >4cm | Inner Reef | dca17 | tt23 | -0.0177708 | -0.0466591 | -0.0033855 | 0 | 0 |
| FAVI | <4cm | Nearshore Ridge Complex | dca17 | tt23 | 0.0082973 | 0.0022681 | 0.0155915 | 0 | 0 |
| MADR | <4cm | Outer Reef | dca17 | tt23 | -0.0264241 | -0.0486699 | -0.0111148 | 0 | 0 |
| MADR | <4cm | Outer Reef | tt21 | tt23 | -0.0237053 | -0.0455274 | -0.0079857 | 0 | 0 |
| MADR | >4cm | Outer Reef | tt21 | tt23 | 0.0856729 | 0.0315912 | 0.1556149 | 0 | 0 |
| MCAV | <4cm | Nearshore Ridge Complex | dca17 | drm24 | 0.0356163 | 0.0127867 | 0.0674299 | 0 | 0 |
| MCAV | <4cm | Nearshore Ridge Complex | drm24 | tt23 | -0.0833138 | -0.1556538 | -0.0420177 | 0 | 0 |
| MCAV | >4cm | Inner Reef | dca17 | tt23 | -0.1422167 | -0.2963079 | -0.0392733 | 0 | 0 |
| MEAN | <4cm | Nearshore Ridge Complex | dca17 | drm24 | 0.0277780 | 0.0139281 | 0.0473012 | 0 | 0 |
| MEAN | <4cm | Nearshore Ridge Complex | drm24 | tt23 | -0.0117991 | -0.0243294 | -0.0032560 | 0 | 0 |
| ORBI | >4cm | Nearshore Ridge Complex | dca17_esa | nsu11_esa | -0.0027144 | -0.0047212 | -0.0014143 | 0 | 0 |
| ORBI | >4cm | Inner Reef | dca17_esa | nsu11_esa | -0.0092829 | -0.0145948 | -0.0047959 | 0 | 0 |
| ORBI | >4cm | Inner Reef | dca17_esa | tt21 | -0.0563671 | -0.1663872 | -0.0128617 | 0 | 0 |
| ORBI | >4cm | Inner Reef | nsu11_esa | tt21 | -0.0470842 | -0.1584491 | -0.0049816 | 0 | 0 |
| ORBI | >4cm | Middle Reef | dca17_esa | nsu11_esa | -0.0391281 | -0.0649235 | -0.0233560 | 0 | 0 |
| ORBI | >4cm | Outer Reef | dca17 | nsu11_esa | -0.0264160 | -0.0420822 | -0.0151089 | 0 | 0 |
| ORBI | >4cm | Outer Reef | dca17_esa | nsu11_esa | -0.0282150 | -0.0442810 | -0.0172957 | 0 | 0 |
| PORI | <4cm | Nearshore Ridge Complex | dca17 | drm24 | -0.1816289 | -0.3209932 | -0.0902685 | 0 | 0 |
| PORI | <4cm | Nearshore Ridge Complex | drm24 | tt23 | 0.1955867 | 0.1007462 | 0.3355481 | 0 | 0 |
| PORI | >4cm | Nearshore Ridge Complex | dca17 | drm24 | -0.1225356 | -0.2483919 | -0.0341416 | 0 | 0 |
| PORI | >4cm | Nearshore Ridge Complex | dca17 | tt23 | 0.0870676 | 0.0518930 | 0.1388792 | 0 | 0 |
| PORI | >4cm | Nearshore Ridge Complex | drm24 | tt23 | 0.2096031 | 0.1157690 | 0.3417321 | 0 | 0 |
| PORI | >4cm | Nearshore Ridge Complex | tt21 | tt23 | 0.1873019 | 0.0815793 | 0.4011568 | 0 | 0 |
| PORI | <4cm | Inner Reef | dca17 | drm24 | -0.0726574 | -0.1316216 | -0.0307625 | 0 | 0 |
| PORI | <4cm | Inner Reef | drm24 | tt23 | 0.0824516 | 0.0386936 | 0.1465790 | 0 | 0 |
| PORI | >4cm | Inner Reef | dca17 | drm24 | -0.2035617 | -0.4168452 | -0.0795764 | 0 | 0 |
| PORI | >4cm | Inner Reef | drm24 | tt23 | 0.2107337 | 0.0790772 | 0.4299514 | 0 | 0 |
| PORI | <4cm | Middle Reef | dca17 | drm24 | -0.0656863 | -0.1812608 | -0.0113614 | 0 | 0 |
| PORI | <4cm | Outer Reef | dca17 | tt21 | -0.1237880 | -0.2385083 | -0.0500171 | 0 | 0 |
| PORI | <4cm | Outer Reef | dca17 | tt23 | -0.2133883 | -0.3257182 | -0.1265684 | 0 | 0 |
| SIDE | <4cm | Nearshore Ridge Complex | dca17 | tt23 | 0.4784576 | 0.1899208 | 0.9081005 | 0 | 0 |
| SIDE | <4cm | Nearshore Ridge Complex | drm24 | tt23 | 1.1409448 | 0.5481313 | 2.0300861 | 0 | 0 |
| SIDE | >4cm | Nearshore Ridge Complex | dca17 | tt23 | 0.0632217 | 0.0184330 | 0.1198096 | 0 | 0 |
| SIDE | >4cm | Nearshore Ridge Complex | tt21 | tt23 | 0.1954759 | 0.0617435 | 0.4641523 | 0 | 0 |
| SIDE | <4cm | Middle Reef | dca17 | drm24 | -0.9133400 | -2.0111107 | -0.4146305 | 0 | 0 |
| SIDE | <4cm | Middle Reef | dca17 | tt21 | -0.3498585 | -0.8657085 | -0.0664937 | 0 | 0 |
| SIDE | <4cm | Middle Reef | dca17 | tt23 | -0.2735147 | -0.4800186 | -0.1291287 | 0 | 0 |
| SIDE | <4cm | Outer Reef | dca17 | tt21 | -0.6767589 | -1.1942836 | -0.3736281 | 0 | 0 |
| SIDE | <4cm | Outer Reef | dca17 | tt23 | -0.7153612 | -1.1712528 | -0.4335441 | 0 | 0 |
| SINT | <4cm | Nearshore Ridge Complex | dca17 | drm24 | -0.0395015 | -0.0891801 | -0.0113968 | 0 | 0 |
| SINT | <4cm | Nearshore Ridge Complex | dca17 | tt23 | -0.1066241 | -0.1866396 | -0.0581286 | 0 | 0 |
| SINT | <4cm | Nearshore Ridge Complex | tt21 | tt23 | -0.1099927 | -0.1877617 | -0.0548921 | 0 | 0 |
| SINT | >4cm | Nearshore Ridge Complex | dca17 | tt23 | -0.0771680 | -0.1362921 | -0.0217434 | 0 | 0 |
| SINT | >4cm | Nearshore Ridge Complex | drm24 | tt23 | -0.1046947 | -0.1704248 | -0.0525528 | 0 | 0 |
| SINT | <4cm | Outer Reef | dca17 | tt23 | -0.0629833 | -0.1284487 | -0.0167352 | 0 | 0 |
| SINT | >4cm | Outer Reef | dca17 | tt23 | 0.1276889 | 0.0549119 | 0.2030887 | 0 | 0 |
| SINT | >4cm | Outer Reef | tt21 | tt23 | 0.1917505 | 0.0744990 | 0.3669390 | 0 | 0 |
| SOLE | <4cm | Nearshore Ridge Complex | dca17 | tt23 | -0.0160397 | -0.0337524 | -0.0071069 | 0 | 0 |
| SOLE | <4cm | Outer Reef | dca17 | tt21 | -0.0163198 | -0.0409416 | -0.0034461 | 0 | 0 |
| SOLE | <4cm | Outer Reef | dca17 | tt23 | -0.0170995 | -0.0337618 | -0.0068944 | 0 | 0 |
Lots of idiosyncratic diffs between surveys for individual taxon x class groups? Reflects diff sites? No super clear patterns.
Right now this does NOT differentiate N and S of the channel… This is because no significant differences based on direction were detected within any habitat in any dataset (except TT21 NRC, but this only had 2 sites N of channel, and they were right on the side of the channel…)
# calc total density per dataset per habitat type, averaging across N and S
# Compute and plot total coral abundance by dataset, habitat Type, and direction from channel
# Step 1: Sum epred across taxon and class within each dataset, habitat Type, and direction
# posterior_dir_totals <- posterior_draws %>%
# group_by(.draw, dataset, Type) %>%
# summarize(dir_total = sum(.epred), .groups = "drop")
# Step 2: Sum across all taxa within habitat Types per dataset
posterior_type_totals <- posterior_draws2 %>%
group_by(.draw, dataset, Type) %>%
summarize(total_epred = sum(.epred), .groups = "drop")
# Step 3: Summarize across posterior draws
total_perType <- posterior_type_totals %>%
group_by(dataset, Type) %>%
summarize(
fit_mean = mean(total_epred),
fit_sd = sd(total_epred),
fit_lower = quantile(total_epred, 0.025),
fit_upper = quantile(total_epred, 0.975),
.groups = "drop"
)
# Compute total abundances within impact zones
totals <- left_join(total_perType, area_summary, by = "Type") %>%
mutate(tot_estimate = fit_mean * total_area_m2,
tot_lower = fit_lower * total_area_m2,
tot_upper = fit_upper * total_area_m2) %>%
mutate(Type = factor(Type, levels = type_levels))
# Step 2: Add missing combinations as NA rows
totals <- totals %>%
complete(dataset, Type, fill = list(tot_estimate = NA, tot_lower = NA, tot_upper = NA)) %>%
mutate(name = factor(name, levels = zone_levels))
# Step 3: Plot
p1 <- ggplot(totals, aes(x = dataset, y = tot_estimate, fill = fct_rev(name))) +
facet_grid(~Type) +
geom_col(position = position_stack()) +
# geom_errorbar(aes(ymin = tot_lower, ymax = tot_upper),
# position = position_dodge(width = 0.9), width = 0.2) +
scale_fill_brewer(palette = "Set3") +
labs(x = "Habitat Type",
y = "Total corals",
title = "Total corals in all impact zones, by habitat") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
p1
# 🎯 Goal
#
# Estimate: density of taxon × class within each habitat Type
# ...averaging over datasets, but not letting datasets with more transects in a given habitat dominate unfairly.
# with zero-inflation that varies per taxon*class
# mod_nb3 <- brm(
# bf(n ~ taxon:class * Type +
# (1 + Type | dataset) +
# (1 | dataset:taxon:class:Type:dir) +
# offset(log(transect_area_m2)),
# zi ~ taxon * Type),
# data = dfftaxon, # use dataset with all structural zeros present
# family = zero_inflated_negbinomial(),
# prior = c(
# prior(normal(0, 2), class = "b"),
# prior(normal(0, 5), class = "Intercept"),
# prior(exponential(1), class = "shape"),
# prior(normal(0, 1), class = "b", dpar = "zi")
# ),
# chains = 4, cores = 4, threads = threading(5),
# iter = 200, warmup = 50, thin = 2,
# backend = "cmdstanr",
# control = list(adapt_delta = 0.95)
# )
#
# saveRDS(mod_nb3, file = "data/processed/mod_nb3.rds")
mod_nb3 <- readRDS("data/processed/mod_nb3.rds")
# 1. Create newdata grid (1 m² for standardization)
newdata <- dfftaxon_trimmed %>%
distinct(Type, taxon, class) %>%
mutate(transect_area_m2 = 1)
# Posterior draws of predicted density (n / m²)
preds <- add_epred_draws(mod_nb3, newdata = newdata, re_formula = NA) # excludes random effects (gives population-level average)
# Average across all draws
summary_preds <- preds %>%
group_by(taxon, class, Type) %>%
mean_qi(.epred, .width = 0.95)
loo_3 <- loo(mod_nb3)
loo_compare(loo_2, loo_3)
# Predictive performance similar between the two models (mod3 slightly better) -- can choose dataset as random (mod3) based on interpretability
# mod3 also has slightly lower p_loo (good, slightly simpler )
###############
# DENSITY OF EACH TAXON x CLASS IN EACH HABITAT TYPE
#### PLOT
ggplot(summary_preds, aes(x = Type, y = .epred, color = class)) +
geom_point() +
geom_errorbar(aes(ymin = .lower, ymax = .upper), width = 0.1) +
geom_line(aes(group = class)) +
facet_wrap(~taxon) +
scale_y_log10() +
scale_x_discrete(labels = type_labels) +
labs(x = "Habitat Type", y = "Density (per m2)")+
theme_minimal()
#### TABLE
# Step 1: Format the data
type_order <- unique(summary_preds$Type) # preserves original order
class_order <- c("<4cm", ">4cm")
summary_preds_fmt <- summary_preds %>%
mutate(
class = factor(class, levels = class_order),
Type = factor(Type, levels = type_order),
mean_ci = case_when(
.epred < 0.001 ~ sprintf("%.1e<br>[%.1e–%.1e]", .epred, .lower, .upper),
TRUE ~ sprintf("%.4f<br>[%.4f–%.4f]", .epred, .lower, .upper)
)
)
# Step 2: Pivot to have one column per habitat Type
table_out <- summary_preds_fmt %>%
select(taxon, class, Type, mean_ci) %>%
pivot_wider(names_from = Type, values_from = mean_ci) %>%
arrange(taxon, class)
# Step 3: Pretty-print the table
knitr::kable(table_out, format = "html", escape = FALSE,
caption = "Density (per m2) of each coral taxon and size class in each habitat type, with 95% C.I.") %>%
kable_styling(font_size = 10)
| taxon | class | Nearshore Ridge Complex | Inner Reef | Middle Reef | Outer Reef |
|---|---|---|---|---|---|
| ACER |
3.4e-04 [8.7e-05–8.6e-04] |
NA | NA | NA | |
| ACER | >4cm |
0.0205 [0.0090–0.0415] |
5.2e-04 [1.0e-04–1.5e-03] |
1.1e-05 [9.3e-07–3.6e-05] |
4.7e-05 [7.8e-06–1.5e-04] |
| AGAR |
0.0012 [0.0003–0.0032] |
0.0029 [0.0006–0.0074] |
0.0023 [0.0002–0.0089] |
0.0064 [0.0019–0.0169] |
|
| AGAR | >4cm |
0.0046 [0.0017–0.0099] |
0.0111 [0.0030–0.0245] |
0.0121 [0.0020–0.0454] |
0.0316 [0.0108–0.0671] |
| CNAT | >4cm | NA |
5.8e-04 [1.5e-05–2.5e-03] |
NA |
0.0012 [0.0002–0.0044] |
| DLAB | >4cm |
0.0012 [0.0003–0.0029] |
0.0016 [0.0003–0.0046] |
0.0039 [0.0005–0.0152] |
0.0020 [0.0005–0.0049] |
| DSTO | >4cm |
0.0222 [0.0099–0.0453] |
0.0193 [0.0080–0.0372] |
0.0037 [0.0007–0.0126] |
0.0070 [0.0026–0.0160] |
| EFAS | >4cm |
9.3e-04 [1.8e-04–2.4e-03] |
0.0080 [0.0027–0.0199] |
0.0079 [0.0012–0.0223] |
0.0040 [0.0010–0.0101] |
| FAVI |
0.0071 [0.0024–0.0146] |
0.0076 [0.0020–0.0198] |
0.0085 [0.0014–0.0304] |
0.0047 [0.0012–0.0112] |
|
| MADR | NA |
0.0015 [0.0001–0.0050] |
7.9e-04 [3.3e-05–3.8e-03] |
0.0110 [0.0032–0.0273] |
|
| MADR | >4cm |
0.0019 [0.0005–0.0052] |
0.0055 [0.0013–0.0151] |
0.0250 [0.0037–0.0806] |
0.0701 [0.0221–0.1883] |
| MCAV |
0.0485 [0.0226–0.0979] |
0.1455 [0.0645–0.3111] |
0.1128 [0.0225–0.3632] |
0.1657 [0.0581–0.3837] |
|
| MCAV | >4cm |
0.0413 [0.0198–0.0779] |
0.1208 [0.0569–0.2591] |
0.1602 [0.0316–0.5710] |
0.1572 [0.0542–0.3864] |
| MEAN |
0.0146 [0.0053–0.0296] |
0.0167 [0.0063–0.0375] |
0.0201 [0.0034–0.0737] |
0.0175 [0.0050–0.0401] |
|
| MMEA | >4cm |
0.0027 [0.0009–0.0061] |
0.0054 [0.0018–0.0144] |
0.0216 [0.0039–0.0674] |
0.0155 [0.0050–0.0356] |
| MUSS |
0.0018 [0.0004–0.0049] |
0.0019 [0.0002–0.0071] |
0.0024 [0.0002–0.0103] |
0.0073 [0.0022–0.0179] |
|
| MYCE | >4cm | NA |
0.0035 [0.0007–0.0096] |
0.0086 [0.0013–0.0284] |
0.0037 [0.0010–0.0094] |
| ORBI |
9.9e-05 [1.7e-05–3.3e-04] |
NA |
3.4e-04 [1.3e-05–1.5e-03] |
2.8e-04 [2.5e-05–9.9e-04] |
|
| ORBI | >4cm |
0.0011 [0.0004–0.0022] |
0.0046 [0.0020–0.0093] |
0.0025 [0.0005–0.0080] |
0.0039 [0.0012–0.0102] |
| PCLI | >4cm |
0.0042 [0.0016–0.0095] |
NA | NA | NA |
| PORI |
0.0562 [0.0291–0.1096] |
0.0263 [0.0087–0.0552] |
0.0306 [0.0057–0.1071] |
0.1403 [0.0480–0.2941] |
|
| PORI | >4cm |
0.1091 [0.0519–0.2242] |
0.1299 [0.0590–0.2542] |
0.1706 [0.0363–0.5922] |
0.4672 [0.1651–1.0317] |
| PSTR | >4cm |
0.0072 [0.0029–0.0142] |
0.0145 [0.0059–0.0291] |
0.0046 [0.0006–0.0171] |
0.0027 [0.0006–0.0078] |
| SIDE |
1.2117 [0.5641–2.4891] |
0.7066 [0.3160–1.4079] |
0.4837 [0.0894–1.6469] |
0.6185 [0.2322–1.2748] |
|
| SIDE | >4cm |
0.1633 [0.0873–0.3055] |
0.3378 [0.1568–0.7610] |
0.2358 [0.0456–0.7998] |
0.2546 [0.0870–0.5536] |
| SINT |
0.0539 [0.0222–0.1232] |
0.0650 [0.0269–0.1164] |
0.0844 [0.0141–0.3034] |
0.1487 [0.0535–0.3473] |
|
| SINT | >4cm |
0.0994 [0.0459–0.1920] |
0.1940 [0.0765–0.4303] |
0.2501 [0.0489–0.8755] |
0.2395 [0.0769–0.5374] |
| SOLE |
0.0062 [0.0024–0.0144] |
0.0105 [0.0032–0.0246] |
0.0036 [0.0004–0.0130] |
0.0102 [0.0031–0.0250] |
|
| SOLE | >4cm |
0.0218 [0.0098–0.0419] |
0.0171 [0.0062–0.0366] |
0.0081 [0.0015–0.0290] |
0.0066 [0.0019–0.0177] |
############
# TOTAL COLONIES IN EACH IMPACT ZONE
# Calculate total per taxclass per habitat Type and impact zone
tot1 <- left_join(summary_preds, area_summary, by = "Type") %>%
mutate(tot = .epred * total_area_m2,
tot_lower = .epred * total_area_m2,
tot_upper = .epred * total_area_m2)
tot1 %>%
group_by(Type, name) %>%
summarize(total_area_m2 = sum(total_area_m2), .groups = "drop") %>%
pivot_wider(names_from = name, values_from = total_area_m2) %>%
janitor::adorn_totals(where = c("row", "col")) %>%
mutate(across(where(is.numeric), ~ formatC(.x, format = "e", digits = 2))) %>%
knitr::kable(caption = "Total area (m2) of each habitat type in each impact zone")
| Type | Channel | Side Slopes | Depth_10cm | Depth_5cm | Depth_1cm | Depth_0_5cm | Depth_0_1cm | Nearshore | Rest of Monitoring Area | North | East | South | Total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Nearshore Ridge Complex | 1.17e+04 | 1.58e+05 | 1.15e+06 | 1.18e+06 | 5.71e+06 | 3.89e+06 | 9.98e+06 | 1.63e+06 | 3.21e+07 | NA | NA | NA | 5.58e+07 |
| Inner Reef | 1.35e+04 | 1.28e+04 | 3.15e+05 | 2.87e+05 | 1.29e+06 | 7.52e+05 | 2.86e+06 | NA | 1.06e+07 | NA | NA | NA | 1.62e+07 |
| Middle Reef | 7.77e+05 | 4.38e+04 | 2.60e+05 | 2.38e+05 | 1.06e+06 | 7.09e+05 | 1.33e+06 | NA | 6.87e+06 | NA | NA | NA | 1.13e+07 |
| Outer Reef | 1.48e+06 | 2.13e+04 | 2.88e+05 | 3.04e+05 | 1.20e+06 | 8.02e+05 | 1.91e+06 | NA | 1.19e+07 | 1.61e+06 | 1.01e+06 | 7.15e+05 | 2.13e+07 |
| Total | 2.28e+06 | 2.36e+05 | 2.01e+06 | 2.01e+06 | 9.26e+06 | 6.16e+06 | 1.61e+07 | 1.63e+06 | 6.15e+07 | 1.61e+06 | 1.01e+06 | 7.15e+05 | 1.04e+08 |
# Calculate total per taxclass per impact zone (sum all habitat Types)
tot2 <- tot1 %>%
group_by(taxon, class, name) %>%
summarize(tot = round(sum(tot))) %>%
arrange(name)
tot2 %>% ungroup() %>%
pivot_wider(names_from = name, values_from = tot, names_sort = FALSE) %>%
janitor::adorn_totals(where = c("row", "col")) %>%
knitr::kable(caption = "Total number of colonies of each coral taxon and size class in each impact zone")
| taxon | class | Channel | Side Slopes | Depth_10cm | Depth_5cm | Depth_1cm | Depth_0_5cm | Depth_0_1cm | Nearshore | North | East | South | Rest of Monitoring Area | Total |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ACER | <4cm | 0 | 2 | 15 | 15 | 75 | 51 | 131 | 21 | NA | NA | NA | 421 | 731 |
| ACER | >4cm | 12 | 125 | 913 | 935 | 4528 | 3087 | 7935 | 1286 | 3 | 2 | 1 | 25535 | 44362 |
| AGAR | <4cm | 425 | 18 | 179 | 179 | 784 | 515 | 1345 | 74 | 385 | 240 | 171 | 6093 | 10408 |
| AGAR | >4cm | 2106 | 79 | 797 | 799 | 3465 | 2282 | 5854 | 290 | 1892 | 1179 | 838 | 27409 | 46990 |
| CNAT | >4cm | 64 | 1 | 19 | 20 | 80 | 51 | 146 | NA | 70 | 43 | 31 | 750 | 1275 |
| DLAB | >4cm | 227 | 16 | 133 | 131 | 594 | 393 | 982 | 76 | 120 | 75 | 53 | 4064 | 6864 |
| DSTO | >4cm | 518 | 157 | 1328 | 1333 | 6300 | 4196 | 11347 | 1393 | 420 | 262 | 186 | 39395 | 66835 |
| EFAS | >4cm | 460 | 26 | 259 | 247 | 1100 | 704 | 1919 | 58 | 238 | 149 | 106 | 8251 | 13517 |
| FAVI | <4cm | 517 | 65 | 541 | 537 | 2494 | 1656 | 4333 | 447 | 278 | 173 | 123 | 16189 | 27353 |
| MADR | <4cm | 627 | 11 | 143 | 147 | 593 | 391 | 980 | NA | 656 | 409 | 291 | 5663 | 9911 |
| MADR | >4cm | 4598 | 112 | 1150 | 1167 | 4831 | 3212 | 7582 | 122 | 4188 | 2610 | 1856 | 42185 | 73613 |
| MCAV | <4cm | 12568 | 688 | 6804 | 6706 | 29849 | 19472 | 52130 | 3045 | 9906 | 6173 | 4390 | 222287 | 374018 |
| MCAV | >4cm | 13500 | 705 | 6567 | 6444 | 28596 | 18715 | 48452 | 2592 | 9397 | 5856 | 4164 | 212058 | 357046 |
| MEAN | <4cm | 1578 | 145 | 1235 | 1227 | 5633 | 3738 | 9708 | 917 | 1047 | 653 | 464 | 37877 | 64222 |
| MMEA | >4cm | 1502 | 68 | 567 | 555 | 2436 | 1613 | 3838 | 170 | 929 | 579 | 412 | 18124 | 30793 |
| MUSS | <4cm | 475 | 21 | 202 | 205 | 901 | 600 | 1520 | 110 | 436 | 272 | 193 | 6793 | 11728 |
| MYCE | >4cm | 461 | 19 | 168 | 159 | 689 | 445 | 1084 | NA | 222 | 138 | 98 | 5330 | 8813 |
| ORBI | <4cm | 25 | 1 | 11 | 11 | 48 | 32 | 75 | 6 | 17 | 10 | 7 | 334 | 577 |
| ORBI | >4cm | 294 | 16 | 170 | 167 | 741 | 479 | 1325 | 67 | 235 | 146 | 104 | 5605 | 9349 |
| PCLI | >4cm | 2 | 26 | 187 | 192 | 931 | 635 | 1629 | 266 | NA | NA | NA | 5234 | 9102 |
| PORI | <4cm | 8659 | 517 | 4603 | 4696 | 21112 | 14172 | 35953 | 3523 | 8388 | 5227 | 3717 | 150091 | 260658 |
| PORI | >4cm | 30864 | 1385 | 13082 | 13201 | 58094 | 38610 | 97926 | 6842 | 27924 | 17402 | 12375 | 438950 | 756655 |
| PSTR | >4cm | 295 | 61 | 570 | 560 | 2614 | 1707 | 4795 | 454 | 159 | 99 | 70 | 17235 | 28619 |
| SIDE | <4cm | 49324 | 9029 | 73521 | 74067 | 348244 | 233423 | 611513 | 76014 | 36970 | 23040 | 16385 | 2184523 | 3736053 |
| SIDE | >4cm | 21272 | 1759 | 16382 | 16154 | 73561 | 48208 | 129930 | 10244 | 15218 | 9484 | 6744 | 514168 | 863124 |
| SINT | <4cm | 10742 | 620 | 5597 | 5606 | 25100 | 16661 | 42663 | 3379 | 8886 | 5537 | 3938 | 180957 | 309686 |
| SINT | >4cm | 20771 | 1312 | 11797 | 11633 | 52307 | 34424 | 89220 | 6235 | 14319 | 8923 | 6346 | 373721 | 631008 |
| SOLE | <4cm | 674 | 57 | 547 | 546 | 2487 | 1636 | 4451 | 391 | 608 | 379 | 270 | 17415 | 29461 |
| SOLE | >4cm | 620 | 160 | 1319 | 1322 | 6245 | 4167 | 11113 | 1365 | 392 | 245 | 174 | 38858 | 65980 |
| Total | - | 183180 | 17201 | 148806 | 148961 | 684432 | 455275 | 1189879 | 119387 | 143303 | 89305 | 63507 | 4605515 | 7848751 |
tot2 %>% filter(taxon == "CNAT")
## # A tibble: 11 × 4
## # Groups: taxon, class [1]
## taxon class name tot
## <chr> <chr> <fct> <dbl>
## 1 CNAT >4cm Channel 64
## 2 CNAT >4cm Side Slopes 1
## 3 CNAT >4cm Depth_10cm 19
## 4 CNAT >4cm Depth_5cm 20
## 5 CNAT >4cm Depth_1cm 80
## 6 CNAT >4cm Depth_0_5cm 51
## 7 CNAT >4cm Depth_0_1cm 146
## 8 CNAT >4cm North 70
## 9 CNAT >4cm East 43
## 10 CNAT >4cm South 31
## 11 CNAT >4cm Rest of Monitoring Area 750
# Calculate total per impact zone (sum all taxon/class)
tot3 <- tot2 %>%
group_by(name) %>%
summarize(tot = sum(tot))
tot2 %>%
mutate(ESA = if_else(taxon %in% c("ACER", "ORBI"), "ESA", "non-ESA")) %>%
group_by(name, ESA) %>%
summarize(tot = sum(tot), .groups = "drop") %>%
pivot_wider(names_from = ESA, values_from = tot) %>%
janitor::adorn_totals(where = c("row", "col")) %>%
mutate(across(where(is.numeric), ~ format(round(.), big.mark = ","))) %>%
knitr::kable(caption = "Total coral colonies in all impact zones")
| name | ESA | non-ESA | Total |
|---|---|---|---|
| Channel | 331 | 182,849 | 183,180 |
| Side Slopes | 144 | 17,057 | 17,201 |
| Depth_10cm | 1,109 | 147,697 | 148,806 |
| Depth_5cm | 1,128 | 147,833 | 148,961 |
| Depth_1cm | 5,392 | 679,040 | 684,432 |
| Depth_0_5cm | 3,649 | 451,626 | 455,275 |
| Depth_0_1cm | 9,466 | 1,180,413 | 1,189,879 |
| Nearshore | 1,380 | 118,007 | 119,387 |
| North | 255 | 143,048 | 143,303 |
| East | 158 | 89,147 | 89,305 |
| South | 112 | 63,395 | 63,507 |
| Rest of Monitoring Area | 31,895 | 4,573,620 | 4,605,515 |
| Total | 55,019 | 7,793,732 | 7,848,751 |
# STEP 1: Aggregate total abundance by taxon + class
tot4 <- tot2 %>%
group_by(taxon, class) %>%
summarize(tot = sum(tot), .groups = "drop")
# STEP 2: Set tile resolution
# tile_size <- 2500
# tile_size <- 5000
tile_size <- 1000
# STEP 3: Compute number of tiles per taxon + class
tile_totals <- tot4 %>%
group_by(taxon) %>%
summarize(total_raw = sum(tot), .groups = "drop") %>%
mutate(total_tiles = round(total_raw / tile_size))
# Join back to get size-class-specific raw totals
tile_data <- tot4 %>%
left_join(tile_totals, by = "taxon") %>%
mutate(
prop = tot / total_raw,
n_tiles = round(prop * total_tiles)
) %>%
filter(n_tiles > 0) %>%
arrange(taxon)
# STEP 4: Order taxa by total abundance (across size classes)
taxon_order <- tile_data %>%
group_by(taxon) %>%
summarize(total_n = sum(tot),
total_tiles = sum(n_tiles), .groups = "drop") %>%
arrange(total_n) %>%
mutate(taxon_row_start = cumsum(lag(total_tiles %/% 100 + 1, default = 0)))
# STEP 5: Expand to individual tiles
tile_expanded <- tile_data %>%
uncount(n_tiles) %>%
mutate(class = factor(class, levels = c(">4cm", "<4cm"))) %>%
left_join(taxon_order, by = "taxon") %>%
group_by(taxon) %>%
mutate(
tile_id = row_number(),
col = (tile_id - 1) %% 100,
row = -(taxon_row_start + (tile_id - 1) %/% 100)
) %>%
ungroup() %>%
mutate(taxon = factor(taxon, levels = taxon_order$taxon))
# STEP 6: Assign distinct shuffled Dark2 colors per taxon
taxa <- levels(tile_expanded$taxon)
n_taxa <- length(taxa)
# Generate color palette (Dark2 can go up to 8, then extended)
raw_colors <- c(
"brown", "dodgerblue2", "#E31A1C", "green4", "#6A3D9A", "#FF7F00", "black",
"gold1", "skyblue2", "maroon", "palegreen2", "#FDBF6F", "gray70",
"orchid1", "darkturquoise", "darkorange4",
"deepskyblue4", "darkolivegreen3", "firebrick3",
"mediumvioletred"
)
# Assign names
base_colors <- setNames(rev(raw_colors), taxa)
# STEP 7: Label positions
row_labels <- tile_expanded %>%
group_by(taxon) %>%
summarize(label_y = max(row), label_x_end = max(col), .groups = "drop")
# STEP 8: Taxon totals
taxon_totals <- tot4 %>%
group_by(taxon) %>%
summarize(total = sum(tot), .groups = "drop") %>%
mutate(total_fmt = format(round(total), big.mark = ","))
row_labels <- row_labels %>%
left_join(taxon_totals, by = "taxon")
# STEP 9: Plot
waffle <- ggplot(tile_expanded, aes(x = col, y = row, fill = taxon)) +
geom_tile(width = 0.95, height = 0.95, color = "white", linewidth = 0.1) +
# Dot for <4cm
geom_point(
data = filter(tile_expanded, class == "<4cm"),
aes(x = col, y = row),
shape = 16, color = "white", size = 0.5, inherit.aes = FALSE
) +
# Taxon names (left)
geom_text(
data = row_labels,
aes(x = -2, y = label_y, label = taxon),
hjust = 1, size = 2.5, inherit.aes = FALSE
) +
# Totals (right)
geom_text(
data = row_labels,
aes(x = label_x_end, y = label_y, label = total_fmt),
hjust = 0, size = 2.5, inherit.aes = FALSE
) +
scale_fill_manual(values = base_colors, guide = "none") +
coord_equal(clip = "off") + # allow text to overflow plot area
theme_void() +
theme(
plot.margin = margin(t = 10, r = 80, b = 10, l = 80), # ensure space on both sides
axis.text = element_blank()
) +
labs(
title = "Total abundance within 1.2-km of channel\n(1 tile = 1,000 corals)",
caption = "White dot = smaller corals (<4cm); solid tiles = >4cm"
)
waffle
# Assign a base color per taxon
base_colors <- scales::hue_pal()(length(unique(tot4$taxon)))
names(base_colors) <- unique(tot4$taxon)
set.seed(2)
base_colors <- sample(ggsci::pal_d3("category20")(20), 20)
names(base_colors) <- unique(tot4$taxon)
# Define shading function
get_shades <- function(base_col, class, all_classes) {
if (length(all_classes) == 2) {
if (class == "<4cm") colorspace::lighten(base_col, 0.1)
else colorspace::darken(base_col, 0.1)
} else {
base_col
}
}
# Build taxon_class and corresponding shades
shade_map <- tot4 %>%
distinct(taxon, class) %>%
rowwise() %>%
mutate(
base_col = base_colors[[taxon]],
all_classes = list(tot4$class[tot4$taxon == taxon]),
shade = get_shades(base_col, class, all_classes),
taxon_class = paste(taxon, class, sep = "_")
) %>%
ungroup()
# Final color map
fill_colors <- setNames(shade_map$shade, shade_map$taxon_class)
# Add taxon_class to original data
tot4 <- tot4 %>%
mutate(taxon_class = paste(taxon, class, sep = "_"))
# Plot with taxon labels only
p <- ggplot(tot4, aes(area = tot, fill = taxon_class, subgroup = taxon)) +
geom_treemap(color = NA) +
geom_treemap_subgroup_border(colour = "white", size = 1) +
# geom_treemap_subgroup_text(place = "topleft", grow = TRUE,
# alpha = 0.8, colour = "black", size = 8) +
scale_fill_manual(values = fill_colors) +
theme(legend.position = "none")
# 5. Get tile layout and attach taxon_class
tile_layout <- treemapify(tot4, area = "tot",
fill = "taxon_class", subgroup = "taxon")
# 6. Flag small boxes (aggregate size classes)
tile_layout_ag <- tile_layout %>%
group_by(taxon) %>%
summarize(xmin = min(xmin), xmax = max(xmax),
ymin = min(ymin), ymax = max(ymax),
area = (xmax - xmin) * (ymax - ymin),
center_x = (xmin + xmax) / 2,
center_y = (ymin + ymax) / 2)
area_threshold <- 0.001
internal_labels <- tile_layout_ag %>%
filter(area >= area_threshold)
external_labels <- tile_layout_ag %>%
filter(area < area_threshold)
# Create labels with taxontotals
taxtots <- tot4 %>%
group_by(taxon) %>%
summarize(tot = scales::comma(sum(tot)))
internal_labels <- left_join(internal_labels, taxtots) %>%
mutate(label = paste0(taxon, "\n", tot))
external_labels <- left_join(external_labels, taxtots) %>%
mutate(label = paste0(taxon, "\n", tot))
# 7. Final plot
p +
scale_x_continuous(limits = c(0, 1), expand = c(0, 0)) +
scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
coord_cartesian(clip = "off") +
geom_text(data = internal_labels,
aes(x = xmin, y = ymax-0.00001,
label = label, size = area), inherit.aes = FALSE,
hjust = 0, vjust = 1, lineheight = 0.7,
nudge_x = 0.005, nudge_y = -0.005) +
scale_size_continuous(range = c(1.1, 15)) +
geom_text_repel(data = external_labels,
aes(x = center_x, y = center_y,
label = label), inherit.aes = FALSE,
max.overlaps = Inf,
xlim = c(1, NA),
force = 5,
min.segment.length = 0.1,
segment.size = 0.2,
hjust = 0,
size = 1.1) +
theme_void() +
theme(
legend.position = "none",
plot.margin = margin(t = 0, r = 20, b = 10, l = 10),
plot.background = element_rect(fill = "white", color = NA),
) +
ggtitle("Total corals within 1.2 km of Port Everglades Channel")
ggsave(filename = "outputs/Fig3.png", width = 170, height = 130, units = "mm", dpi = 300)
# STEP 1: Aggregate total abundance by taxon + class
tot4 <- tot2 %>%
filter(name != "Rest of Monitoring Area") %>%
group_by(taxon, class) %>%
summarize(tot = sum(tot), .groups = "drop")
# STEP 2: Set tile resolution
# tile_size <- 2500
# tile_size <- 5000
tile_size <- 1000
# STEP 3: Compute number of tiles per taxon + class
tile_totals <- tot4 %>%
group_by(taxon) %>%
summarize(total_raw = sum(tot), .groups = "drop") %>%
mutate(total_tiles = round(total_raw / tile_size))
# Join back to get size-class-specific raw totals
tile_data <- tot4 %>%
left_join(tile_totals, by = "taxon") %>%
mutate(
prop = tot / total_raw,
n_tiles = round(prop * total_tiles)
) %>%
filter(n_tiles > 0) %>%
arrange(taxon)
# STEP 4: Order taxa by total abundance (across size classes)
taxon_order <- tile_data %>%
group_by(taxon) %>%
summarize(total_n = sum(tot),
total_tiles = sum(n_tiles), .groups = "drop") %>%
arrange(total_n) %>%
mutate(taxon_row_start = cumsum(lag(total_tiles %/% 100 + 1, default = 0)))
# STEP 5: Expand to individual tiles
tile_expanded <- tile_data %>%
uncount(n_tiles) %>%
mutate(class = factor(class, levels = c(">4cm", "<4cm"))) %>%
left_join(taxon_order, by = "taxon") %>%
group_by(taxon) %>%
mutate(
tile_id = row_number(),
col = (tile_id - 1) %% 100,
row = -(taxon_row_start + (tile_id - 1) %/% 100)
) %>%
ungroup() %>%
mutate(taxon = factor(taxon, levels = taxon_order$taxon))
# STEP 7: Label positions
row_labels <- tile_expanded %>%
group_by(taxon) %>%
summarize(label_y = max(row), label_x_end = max(col), .groups = "drop")
# STEP 8: Taxon totals
taxon_totals <- tot4 %>%
group_by(taxon) %>%
summarize(total = sum(tot), .groups = "drop") %>%
mutate(total_fmt = format(round(total), big.mark = ","))
row_labels <- row_labels %>%
left_join(taxon_totals, by = "taxon")
# STEP 9: Plot
waffle <- ggplot(tile_expanded, aes(x = col, y = row, fill = taxon)) +
geom_tile(width = 0.95, height = 0.95, color = "white", linewidth = 0.1) +
# Dot for <4cm
geom_point(
data = filter(tile_expanded, class == "<4cm"),
aes(x = col, y = row),
shape = 16, color = "white", size = 0.5, inherit.aes = FALSE
) +
# Taxon names (left)
geom_text(
data = row_labels,
aes(x = -2, y = label_y, label = taxon),
hjust = 1, size = 2.5, inherit.aes = FALSE
) +
# Totals (right)
geom_text(
data = row_labels,
aes(x = label_x_end, y = label_y, label = total_fmt),
hjust = 0, size = 2.5, inherit.aes = FALSE
) +
scale_fill_manual(values = base_colors, guide = "none") +
coord_equal(clip = "off") + # allow text to overflow plot area
theme_void() +
theme(
plot.margin = margin(t = 10, r = 80, b = 10, l = 80), # ensure space on both sides
axis.text = element_blank()
) +
labs(
title = "Total abundance within Scenario 2 impact zones\n(1 tile = 1,000 corals)",
caption = "White dot = smaller corals (<4cm); solid tiles = >4cm"
)
waffle
newdata_ds <- dfftaxon_trimmed %>%
distinct(dataset, Type, taxon, class) %>%
mutate(transect_area_m2 = 1,
dir = NA)
preds_ds <- add_epred_draws(
mod_nb3,
newdata = newdata_ds,
re_formula = NULL,
allow_new_levels = TRUE
)
summary_preds_ds <- preds_ds %>%
group_by(taxon, class, Type, dataset) %>%
mean_qi(.epred, .width = 0.95)
ggplot(summary_preds_ds, aes(x = Type, y = .epred,
color = dataset, shape = dataset,
group = interaction(dataset, class))) +
geom_point(position = position_dodge(width = 0.5), size = 2, alpha = 0.5) +
geom_line(alpha = 0.5) +
geom_errorbar(aes(ymin = .lower, ymax = .upper),
position = position_dodge(width = 0.5), width = 0.2, alpha = 0.5) +
facet_wrap(~ taxon + class, scales = "free_y") +
scale_y_log10()+#limits = c(1e-4, 2)) +
labs(x = "Habitat Type", y = "Estimated Density (per m²)",
color = "Size Class", shape = "Dataset") +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#TEST FOR DIFFS IN ANY TAXON/CLASS/TYPE GROUP BETWEEN SURVEYS
# 🔁 Step 1: Group by taxon, class, Type
comparison_results_all <- preds_ds %>%
group_by(taxon, class, Type) %>%
group_split() %>%
map_dfr(function(subgroup) {
# Pivot to wide format: one column per dataset
sub_wide <- subgroup %>%
select(.draw, dataset, .epred) %>%
pivot_wider(names_from = dataset, values_from = .epred)
# Skip if not enough datasets to compare
dataset_names <- names(sub_wide)[-1]
if (length(dataset_names) < 2) return(NULL)
# All pairwise comparisons
comparisons <- combn(dataset_names, 2, simplify = FALSE)
# Compute posterior differences
map_dfr(comparisons, function(pair) {
diff_draws <- sub_wide[[pair[1]]] - sub_wide[[pair[2]]]
tibble(
taxon = unique(subgroup$taxon),
class = unique(subgroup$class),
Type = unique(subgroup$Type),
dataset1 = pair[1],
dataset2 = pair[2],
mean_diff = mean(diff_draws),
lower = quantile(diff_draws, 0.025),
upper = quantile(diff_draws, 0.975),
p_gt_0 = mean(diff_draws > 0),
p_lt_0 = mean(diff_draws < 0)
)
})
})
# 👉 View significant differences (95% CI excludes 0)
comparison_results_all %>%
filter(lower > 0 | upper < 0) %>%
arrange(taxon, class, Type, desc(abs(mean_diff))) %>%
knitr::kable()
| taxon | class | Type | dataset1 | dataset2 | mean_diff | lower | upper | p_gt_0 | p_lt_0 |
|---|---|---|---|---|---|---|---|---|---|
| ORBI | >4cm | Middle Reef | nsu11_esa | dca17_esa | 0.0122602 | 0.0015630 | 0.0422382 | 1.0000000 | 0.0000000 |
| ORBI | >4cm | Outer Reef | nsu11_esa | dca17_esa | 0.0079001 | 0.0003055 | 0.0283506 | 0.9766667 | 0.0233333 |
# test for diffs in sctld-sus groups
# Step 1: Define taxon groups
group_definitions <- tibble(
taxon = unique(preds_ds$taxon),
taxon_group = case_when(
taxon %in% c("FAVI", "MUSS", "MEAN", "DSTO", "EFAS", "MMEA") ~ "high_sus",
taxon %in% c("MCAV", "ORBI", "SIDE", "SINT", "SOLE") ~ "med_sus",
taxon %in% c("ACER", "PORI", "AGAR", "MYCE", "MADR") ~ "low_sus",
TRUE ~ NA_character_
)
)
# Step 2: Join taxon groups to posterior draws
preds_grouped <- preds_ds %>%
ungroup() %>%
left_join(group_definitions, by = "taxon") %>%
filter(!is.na(taxon_group)) # drop any unmatched taxa
# Step 3: Aggregate mean density per group per draw
group_summaries <- preds_grouped %>%
group_by(.draw, taxon_group, dataset, Type, class) %>%
summarize(mean_epred = mean(.epred), .groups = "drop")
# Filter to only surveys that surveyed all taxa
group_summaries <- group_summaries %>%
filter(!str_detect(dataset, "_esa"))
# Step 4: Generate pairwise comparisons across datasets
comparisons <- group_summaries %>%
inner_join(group_summaries, by = c(".draw", "taxon_group", "Type", "class"), suffix = c("_A", "_B")) %>%
filter(dataset_A != dataset_B) %>%
mutate(
dataset_pair = paste(dataset_A, "vs", dataset_B),
diff = mean_epred_A - mean_epred_B
)
# Step 5: Summarize pairwise differences
comparison_summary <- comparisons %>%
group_by(taxon_group, Type, class, dataset_pair) %>%
summarize(
mean_diff = mean(diff),
ci_lower = quantile(diff, 0.025),
ci_upper = quantile(diff, 0.975),
p_diff_gt0 = mean(diff > 0),
p_diff_lt0 = mean(diff < 0),
.groups = "drop"
)
# View results
comparison_summary %>%
arrange(taxon_group, Type, class, desc(p_diff_gt0)) %>%
filter(p_diff_gt0 < 0.1 | p_diff_lt0 < 0.1) %>%
knitr::kable()
| taxon_group | Type | class | dataset_pair | mean_diff | ci_lower | ci_upper | p_diff_gt0 | p_diff_lt0 |
|---|---|---|---|---|---|---|---|---|
| low_sus | Middle Reef | <4cm | drm24 vs dca17 | 0.0265135 | -0.0065550 | 0.1191803 | 0.9200000 | 0.0800000 |
| low_sus | Middle Reef | <4cm | dca17 vs drm24 | -0.0265135 | -0.1191803 | 0.0065550 | 0.0800000 | 0.9200000 |
| med_sus | Middle Reef | <4cm | drm24 vs dca17 | 0.1650948 | -0.0391609 | 0.4763834 | 0.9366667 | 0.0633333 |
| med_sus | Middle Reef | <4cm | dca17 vs drm24 | -0.1650948 | -0.4763834 | 0.0391609 | 0.0633333 | 0.9366667 |
| med_sus | Middle Reef | >4cm | drm24 vs dca17 | 0.1351438 | -0.0395203 | 0.3999008 | 0.9366667 | 0.0633333 |
| med_sus | Middle Reef | >4cm | dca17 vs drm24 | -0.1351438 | -0.3999008 | 0.0395203 | 0.0633333 | 0.9366667 |
| med_sus | Outer Reef | >4cm | tt21 vs dca17 | 0.1880719 | -0.0160591 | 0.5709906 | 0.9666667 | 0.0333333 |
| med_sus | Outer Reef | >4cm | dca17 vs tt21 | -0.1880719 | -0.5709906 | 0.0160591 | 0.0333333 | 0.9666667 |
Look across all habitat Types?
# 3️⃣ Sum densities across all habitat Types
group_overall <- preds_grouped %>%
group_by(.draw, dataset, taxon_group, class) %>%
summarize(overall_density = sum(.epred), .groups = "drop") %>%
filter(!str_detect(dataset, "_esa"))
# 4️⃣ Perform pairwise comparisons across datasets
dataset_pairs <- combn(unique(group_overall$dataset), 2, simplify = FALSE)
comparison_summary <- map_dfr(dataset_pairs, function(pair) {
ds1 <- pair[1]
ds2 <- pair[2]
diffs <- group_overall %>%
filter(dataset %in% pair) %>%
pivot_wider(names_from = dataset, values_from = overall_density) %>%
mutate(
diff = !!sym(ds1) - !!sym(ds2)
) %>%
group_by(taxon_group, class) %>%
summarize(
dataset_pair = paste(ds1, "vs", ds2),
mean_diff = mean(diff, na.rm = TRUE),
ci_lower = quantile(diff, 0.025, na.rm = TRUE),
ci_upper = quantile(diff, 0.975, na.rm = TRUE),
p_diff_gt0 = mean(diff > 0, na.rm = TRUE),
p_diff_lt0 = mean(diff < 0, na.rm = TRUE),
.groups = "drop"
)
diffs
})
# 5️⃣ Filter for meaningful differences (optional)
comparison_summary %>%
filter(p_diff_gt0 > 0.9 | p_diff_lt0 > 0.9) %>%
arrange(taxon_group, class, desc(abs(mean_diff))) %>%
knitr::kable()
| taxon_group | class | dataset_pair | mean_diff | ci_lower | ci_upper | p_diff_gt0 | p_diff_lt0 |
|---|---|---|---|---|---|---|---|
| low_sus | <4cm | drm24 vs tt21 | -0.2011672 | -0.6645275 | 0.0311404 | 0.0433333 | 0.9566667 |
| low_sus | <4cm | drm24 vs tt23 | -0.1466547 | -0.4683312 | 0.0601734 | 0.0833333 | 0.9166667 |
| low_sus | >4cm | drm24 vs tt21 | -0.7317580 | -2.2725030 | 0.3211046 | 0.0633333 | 0.9366667 |
| low_sus | >4cm | drm24 vs tt23 | -0.4935543 | -1.6668211 | 0.3169745 | 0.0900000 | 0.9100000 |
individual taxon x class among datasets? (overall - across all habitat Types)
# 1️⃣ Summarize posterior draws across all habitat Types for each taxon × class × dataset × draw
overall_preds_taxon <- preds_ds %>%
group_by(.draw, dataset, taxon, class) %>%
summarize(overall_density = sum(.epred), .groups = "drop")
# 2️⃣ Get all unique dataset pairs
dataset_pairs <- combn(unique(overall_preds_taxon$dataset), 2, simplify = FALSE)
# 3️⃣ Pairwise comparisons: For each taxon × class, compare datasets
comparison_taxon_summary <- map_dfr(dataset_pairs, function(pair) {
ds1 <- pair[1]
ds2 <- pair[2]
# Reshape and compute difference
diffs <- overall_preds_taxon %>%
filter(dataset %in% pair) %>%
pivot_wider(names_from = dataset, values_from = overall_density) %>%
mutate(diff = !!sym(ds1) - !!sym(ds2)) %>%
group_by(taxon, class) %>%
summarize(
dataset_pair = paste(ds1, "vs", ds2),
mean_diff = mean(diff, na.rm = TRUE),
ci_lower = quantile(diff, 0.025, na.rm = TRUE),
ci_upper = quantile(diff, 0.975, na.rm = TRUE),
p_diff_gt0 = mean(diff > 0, na.rm = TRUE),
p_diff_lt0 = mean(diff < 0, na.rm = TRUE),
.groups = "drop"
)
diffs
})
# 4️⃣ (Optional) Filter to only show differences with high posterior support
comparison_taxon_summary %>%
filter(p_diff_gt0 > 0.95 | p_diff_lt0 > 0.95) %>%
arrange(taxon, class, desc(abs(mean_diff))) %>%
knitr::kable()
| taxon | class | dataset_pair | mean_diff | ci_lower | ci_upper | p_diff_gt0 | p_diff_lt0 |
|---|---|---|---|---|---|---|---|
| DLAB | >4cm | drm24 vs tt23 | -0.0057891 | -0.0175017 | 0.0001123 | 0.0266667 | 0.9733333 |
| DLAB | >4cm | dca17 vs drm24 | 0.0051937 | -0.0005181 | 0.0125144 | 0.9600000 | 0.0400000 |
| DLAB | >4cm | drm24 vs tt21 | -0.0050178 | -0.0164454 | 0.0006360 | 0.0433333 | 0.9566667 |
| MADR | <4cm | drm24 vs tt21 | -0.0133168 | -0.0413103 | 0.0002697 | 0.0266667 | 0.9733333 |
| MADR | <4cm | drm24 vs tt23 | -0.0111352 | -0.0336508 | -0.0004019 | 0.0200000 | 0.9800000 |
| MYCE | >4cm | tt21 vs tt23 | -0.0125304 | -0.0366937 | 0.0009519 | 0.0300000 | 0.9700000 |
| ORBI | >4cm | dca17 vs nsu11_esa | -0.0298381 | -0.0709313 | -0.0101321 | 0.0000000 | 1.0000000 |
| ORBI | >4cm | dca17_esa vs nsu11_esa | -0.0288890 | -0.0698132 | -0.0086604 | 0.0000000 | 1.0000000 |
| ORBI | >4cm | nsu11_esa vs tt21 | 0.0263938 | 0.0061017 | 0.0669171 | 0.9966667 | 0.0033333 |
| ORBI | >4cm | drm24 vs nsu11_esa | -0.0246130 | -0.0592024 | -0.0014104 | 0.0200000 | 0.9800000 |